diff --git "a/rpi5/result.json" "b/rpi5/result.json"
new file mode 100644--- /dev/null
+++ "b/rpi5/result.json"
@@ -0,0 +1,204686 @@
+{
+  "timestamp_utc": "2025-12-08T20:03:22.551221+00:00",
+  "bench_binary": "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+  "system": {
+    "hostname": "raspberrypi",
+    "platform": "Linux-6.12.34+rpt-rpi-2712-aarch64-with-glibc2.36",
+    "python": "3.11.2",
+    "cpu_count": 4,
+    "cpu_info": {
+      "lscpu": [
+        {
+          "field": "Architecture:",
+          "data": "aarch64"
+        },
+        {
+          "field": "CPU op-mode(s):",
+          "data": "32-bit, 64-bit"
+        },
+        {
+          "field": "Byte Order:",
+          "data": "Little Endian"
+        },
+        {
+          "field": "CPU(s):",
+          "data": "4"
+        },
+        {
+          "field": "On-line CPU(s) list:",
+          "data": "0-3"
+        },
+        {
+          "field": "Vendor ID:",
+          "data": "ARM"
+        },
+        {
+          "field": "Model name:",
+          "data": "Cortex-A76"
+        },
+        {
+          "field": "Model:",
+          "data": "1"
+        },
+        {
+          "field": "Thread(s) per core:",
+          "data": "1"
+        },
+        {
+          "field": "Core(s) per cluster:",
+          "data": "4"
+        },
+        {
+          "field": "Socket(s):",
+          "data": "-"
+        },
+        {
+          "field": "Cluster(s):",
+          "data": "1"
+        },
+        {
+          "field": "Stepping:",
+          "data": "r4p1"
+        },
+        {
+          "field": "CPU(s) scaling MHz:",
+          "data": "100%"
+        },
+        {
+          "field": "CPU max MHz:",
+          "data": "2400.0000"
+        },
+        {
+          "field": "CPU min MHz:",
+          "data": "1500.0000"
+        },
+        {
+          "field": "BogoMIPS:",
+          "data": "108.00"
+        },
+        {
+          "field": "Flags:",
+          "data": "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp"
+        },
+        {
+          "field": "L1d cache:",
+          "data": "256 KiB (4 instances)"
+        },
+        {
+          "field": "L1i cache:",
+          "data": "256 KiB (4 instances)"
+        },
+        {
+          "field": "L2 cache:",
+          "data": "2 MiB (4 instances)"
+        },
+        {
+          "field": "L3 cache:",
+          "data": "2 MiB (1 instance)"
+        },
+        {
+          "field": "NUMA node(s):",
+          "data": "8"
+        },
+        {
+          "field": "NUMA node0 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node1 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node2 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node3 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node4 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node5 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node6 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "NUMA node7 CPU(s):",
+          "data": "0-3"
+        },
+        {
+          "field": "Vulnerability Gather data sampling:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Indirect target selection:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Itlb multihit:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability L1tf:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Mds:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Meltdown:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Mmio stale data:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Reg file data sampling:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Retbleed:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Spec rstack overflow:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Spec store bypass:",
+          "data": "Mitigation; Speculative Store Bypass disabled via prctl"
+        },
+        {
+          "field": "Vulnerability Spectre v1:",
+          "data": "Mitigation; __user pointer sanitization"
+        },
+        {
+          "field": "Vulnerability Spectre v2:",
+          "data": "Mitigation; CSV2, BHB"
+        },
+        {
+          "field": "Vulnerability Srbds:",
+          "data": "Not affected"
+        },
+        {
+          "field": "Vulnerability Tsx async abort:",
+          "data": "Not affected"
+        }
+      ]
+    },
+    "total_ram_bytes": 8454881280
+  },
+  "runs": [
+    {
+      "timestamp_utc": "2025-12-08T20:24:43.504957+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:24:24Z\",\n    \"avg_ns\": 1708050119,\n    \"stddev_ns\": 3793891,\n    \"avg_ts\": 74.939499,\n    \"stddev_ts\": 0.166238,\n    \"samples_ns\": [ 1706180773, 1705553894, 1712415691 ],\n    \"samples_ts\": [ 75.0214, 75.0489, 74.7482 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:24:30Z\",\n    \"avg_ns\": 4162604391,\n    \"stddev_ns\": 6687761,\n    \"avg_ts\": 30.750032,\n    \"stddev_ts\": 0.049373,\n    \"samples_ns\": [ 4170023269, 4160751385, 4157038519 ],\n    \"samples_ts\": [ 30.6953, 30.7637, 30.7912 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:24:24Z",
+          "avg_ns": 1708050119,
+          "stddev_ns": 3793891,
+          "avg_ts": 74.939499,
+          "stddev_ts": 0.166238,
+          "samples_ns": [
+            1706180773,
+            1705553894,
+            1712415691
+          ],
+          "samples_ts": [
+            75.0214,
+            75.0489,
+            74.7482
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:24:30Z",
+          "avg_ns": 4162604391,
+          "stddev_ns": 6687761,
+          "avg_ts": 30.750032,
+          "stddev_ts": 0.049373,
+          "samples_ns": [
+            4170023269,
+            4160751385,
+            4157038519
+          ],
+          "samples_ts": [
+            30.6953,
+            30.7637,
+            30.7912
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 0
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:25:42.511612+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:24:44Z\",\n    \"avg_ns\": 1705299723,\n    \"stddev_ns\": 350191,\n    \"avg_ts\": 75.060121,\n    \"stddev_ts\": 0.015197,\n    \"samples_ns\": [ 1705691007, 1705037775, 1705170389 ],\n    \"samples_ts\": [ 75.0429, 75.0717, 75.0658 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:24:50Z\",\n    \"avg_ns\": 17137617759,\n    \"stddev_ns\": 6337087,\n    \"avg_ts\": 29.875800,\n    \"stddev_ts\": 0.011046,\n    \"samples_ns\": [ 17130782920, 17138775695, 17143294663 ],\n    \"samples_ts\": [ 29.8877, 29.8738, 29.8659 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:24:44Z",
+          "avg_ns": 1705299723,
+          "stddev_ns": 350191,
+          "avg_ts": 75.060121,
+          "stddev_ts": 0.015197,
+          "samples_ns": [
+            1705691007,
+            1705037775,
+            1705170389
+          ],
+          "samples_ts": [
+            75.0429,
+            75.0717,
+            75.0658
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:24:50Z",
+          "avg_ns": 17137617759,
+          "stddev_ns": 6337087,
+          "avg_ts": 29.8758,
+          "stddev_ts": 0.011046,
+          "samples_ns": [
+            17130782920,
+            17138775695,
+            17143294663
+          ],
+          "samples_ts": [
+            29.8877,
+            29.8738,
+            29.8659
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:26:23.728240+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:25:43Z\",\n    \"avg_ns\": 7001476404,\n    \"stddev_ns\": 200819,\n    \"avg_ts\": 73.127434,\n    \"stddev_ts\": 0.002097,\n    \"samples_ns\": [ 7001458021, 7001285408, 7001685783 ],\n    \"samples_ts\": [ 73.1276, 73.1294, 73.1252 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:26:11Z\",\n    \"avg_ns\": 4146525296,\n    \"stddev_ns\": 1496355,\n    \"avg_ts\": 30.869222,\n    \"stddev_ts\": 0.011137,\n    \"samples_ns\": [ 4148253085, 4145673142, 4145649661 ],\n    \"samples_ts\": [ 30.8564, 30.8756, 30.8757 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:25:43Z",
+          "avg_ns": 7001476404,
+          "stddev_ns": 200819,
+          "avg_ts": 73.127434,
+          "stddev_ts": 0.002097,
+          "samples_ns": [
+            7001458021,
+            7001285408,
+            7001685783
+          ],
+          "samples_ts": [
+            73.1276,
+            73.1294,
+            73.1252
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:26:11Z",
+          "avg_ns": 4146525296,
+          "stddev_ns": 1496355,
+          "avg_ts": 30.869222,
+          "stddev_ts": 0.011137,
+          "samples_ns": [
+            4148253085,
+            4145673142,
+            4145649661
+          ],
+          "samples_ts": [
+            30.8564,
+            30.8756,
+            30.8757
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 2
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:27:43.734241+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:26:24Z\",\n    \"avg_ns\": 7010795962,\n    \"stddev_ns\": 455739,\n    \"avg_ts\": 73.030224,\n    \"stddev_ts\": 0.004747,\n    \"samples_ns\": [ 7010346197, 7011257450, 7010784239 ],\n    \"samples_ts\": [ 73.0349, 73.0254, 73.0303 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:26:52Z\",\n    \"avg_ns\": 17062348560,\n    \"stddev_ns\": 26609810,\n    \"avg_ts\": 30.007641,\n    \"stddev_ts\": 0.046771,\n    \"samples_ns\": [ 17055403436, 17091741613, 17039900633 ],\n    \"samples_ts\": [ 30.0198, 29.956, 30.0471 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:26:24Z",
+          "avg_ns": 7010795962,
+          "stddev_ns": 455739,
+          "avg_ts": 73.030224,
+          "stddev_ts": 0.004747,
+          "samples_ns": [
+            7010346197,
+            7011257450,
+            7010784239
+          ],
+          "samples_ts": [
+            73.0349,
+            73.0254,
+            73.0303
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:26:52Z",
+          "avg_ns": 17062348560,
+          "stddev_ns": 26609810,
+          "avg_ts": 30.007641,
+          "stddev_ts": 0.046771,
+          "samples_ns": [
+            17055403436,
+            17091741613,
+            17039900633
+          ],
+          "samples_ts": [
+            30.0198,
+            29.956,
+            30.0471
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 3
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:28:03.811762+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:27:44Z\",\n    \"avg_ns\": 1703846957,\n    \"stddev_ns\": 556012,\n    \"avg_ts\": 75.124123,\n    \"stddev_ts\": 0.024517,\n    \"samples_ns\": [ 1704365322, 1703259714, 1703915835 ],\n    \"samples_ts\": [ 75.1013, 75.15, 75.1211 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:27:51Z\",\n    \"avg_ns\": 4163695750,\n    \"stddev_ns\": 296574,\n    \"avg_ts\": 30.741920,\n    \"stddev_ts\": 0.002083,\n    \"samples_ns\": [ 4163631960, 4164004372, 4163450920 ],\n    \"samples_ts\": [ 30.7424, 30.7396, 30.7437 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:27:44Z",
+          "avg_ns": 1703846957,
+          "stddev_ns": 556012,
+          "avg_ts": 75.124123,
+          "stddev_ts": 0.024517,
+          "samples_ns": [
+            1704365322,
+            1703259714,
+            1703915835
+          ],
+          "samples_ts": [
+            75.1013,
+            75.15,
+            75.1211
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:27:51Z",
+          "avg_ns": 4163695750,
+          "stddev_ns": 296574,
+          "avg_ts": 30.74192,
+          "stddev_ts": 0.002083,
+          "samples_ns": [
+            4163631960,
+            4164004372,
+            4163450920
+          ],
+          "samples_ts": [
+            30.7424,
+            30.7396,
+            30.7437
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 4
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:29:02.770664+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:28:04Z\",\n    \"avg_ns\": 1703759789,\n    \"stddev_ns\": 20170,\n    \"avg_ts\": 75.127962,\n    \"stddev_ts\": 0.000889,\n    \"samples_ns\": [ 1703755610, 1703781722, 1703742035 ],\n    \"samples_ts\": [ 75.1281, 75.127, 75.1287 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:28:11Z\",\n    \"avg_ns\": 17121109566,\n    \"stddev_ns\": 9762306,\n    \"avg_ts\": 29.904610,\n    \"stddev_ts\": 0.017054,\n    \"samples_ns\": [ 17109902545, 17125678480, 17127747675 ],\n    \"samples_ts\": [ 29.9242, 29.8966, 29.893 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:28:04Z",
+          "avg_ns": 1703759789,
+          "stddev_ns": 20170,
+          "avg_ts": 75.127962,
+          "stddev_ts": 0.000889,
+          "samples_ns": [
+            1703755610,
+            1703781722,
+            1703742035
+          ],
+          "samples_ts": [
+            75.1281,
+            75.127,
+            75.1287
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:28:11Z",
+          "avg_ns": 17121109566,
+          "stddev_ns": 9762306,
+          "avg_ts": 29.90461,
+          "stddev_ts": 0.017054,
+          "samples_ns": [
+            17109902545,
+            17125678480,
+            17127747675
+          ],
+          "samples_ts": [
+            29.9242,
+            29.8966,
+            29.893
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 5
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:29:43.987782+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:29:03Z\",\n    \"avg_ns\": 7016673493,\n    \"stddev_ns\": 1102322,\n    \"avg_ts\": 72.969051,\n    \"stddev_ts\": 0.011464,\n    \"samples_ns\": [ 7015482082, 7016881230, 7017657167 ],\n    \"samples_ts\": [ 72.9814, 72.9669, 72.9588 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:29:31Z\",\n    \"avg_ns\": 4125206112,\n    \"stddev_ns\": 1332113,\n    \"avg_ts\": 31.028755,\n    \"stddev_ts\": 0.009995,\n    \"samples_ns\": [ 4124878028, 4126668441, 4124071869 ],\n    \"samples_ts\": [ 31.0312, 31.0178, 31.0373 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:29:03Z",
+          "avg_ns": 7016673493,
+          "stddev_ns": 1102322,
+          "avg_ts": 72.969051,
+          "stddev_ts": 0.011464,
+          "samples_ns": [
+            7015482082,
+            7016881230,
+            7017657167
+          ],
+          "samples_ts": [
+            72.9814,
+            72.9669,
+            72.9588
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:29:31Z",
+          "avg_ns": 4125206112,
+          "stddev_ns": 1332113,
+          "avg_ts": 31.028755,
+          "stddev_ts": 0.009995,
+          "samples_ns": [
+            4124878028,
+            4126668441,
+            4124071869
+          ],
+          "samples_ts": [
+            31.0312,
+            31.0178,
+            31.0373
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 6
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:31:03.952283+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:29:44Z\",\n    \"avg_ns\": 7023906466,\n    \"stddev_ns\": 536852,\n    \"avg_ts\": 72.893909,\n    \"stddev_ts\": 0.005434,\n    \"samples_ns\": [ 7023512708, 7024500685, 7023706007 ],\n    \"samples_ts\": [ 72.898, 72.8877, 72.896 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:30:12Z\",\n    \"avg_ns\": 17030511469,\n    \"stddev_ns\": 23989276,\n    \"avg_ts\": 30.063729,\n    \"stddev_ts\": 0.042380,\n    \"samples_ns\": [ 17002959652, 17041809196, 17046765560 ],\n    \"samples_ts\": [ 30.1124, 30.0438, 30.035 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:29:44Z",
+          "avg_ns": 7023906466,
+          "stddev_ns": 536852,
+          "avg_ts": 72.893909,
+          "stddev_ts": 0.005434,
+          "samples_ns": [
+            7023512708,
+            7024500685,
+            7023706007
+          ],
+          "samples_ts": [
+            72.898,
+            72.8877,
+            72.896
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:30:12Z",
+          "avg_ns": 17030511469,
+          "stddev_ns": 23989276,
+          "avg_ts": 30.063729,
+          "stddev_ts": 0.04238,
+          "samples_ns": [
+            17002959652,
+            17041809196,
+            17046765560
+          ],
+          "samples_ts": [
+            30.1124,
+            30.0438,
+            30.035
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 7
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:31:23.911398+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:31:04Z\",\n    \"avg_ns\": 1703396179,\n    \"stddev_ns\": 707283,\n    \"avg_ts\": 75.144007,\n    \"stddev_ts\": 0.031101,\n    \"samples_ns\": [ 1702603854, 1703953613, 1703631072 ],\n    \"samples_ts\": [ 75.179, 75.1194, 75.1336 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:31:11Z\",\n    \"avg_ns\": 4124576863,\n    \"stddev_ns\": 1110120,\n    \"avg_ts\": 31.033488,\n    \"stddev_ts\": 0.008337,\n    \"samples_ns\": [ 4125852411, 4124028406, 4123849773 ],\n    \"samples_ts\": [ 31.0239, 31.0376, 31.039 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:31:04Z",
+          "avg_ns": 1703396179,
+          "stddev_ns": 707283,
+          "avg_ts": 75.144007,
+          "stddev_ts": 0.031101,
+          "samples_ns": [
+            1702603854,
+            1703953613,
+            1703631072
+          ],
+          "samples_ts": [
+            75.179,
+            75.1194,
+            75.1336
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:31:11Z",
+          "avg_ns": 4124576863,
+          "stddev_ns": 1110120,
+          "avg_ts": 31.033488,
+          "stddev_ts": 0.008337,
+          "samples_ns": [
+            4125852411,
+            4124028406,
+            4123849773
+          ],
+          "samples_ts": [
+            31.0239,
+            31.0376,
+            31.039
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 8
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:32:22.651509+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:31:24Z\",\n    \"avg_ns\": 1702907913,\n    \"stddev_ns\": 269645,\n    \"avg_ts\": 75.165545,\n    \"stddev_ts\": 0.011621,\n    \"samples_ns\": [ 1702604947, 1703037915, 1703080879 ],\n    \"samples_ts\": [ 75.1789, 75.1598, 75.1579 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:31:31Z\",\n    \"avg_ns\": 17049756038,\n    \"stddev_ns\": 23425561,\n    \"avg_ts\": 30.029793,\n    \"stddev_ts\": 0.041227,\n    \"samples_ns\": [ 17076706113, 17034278502, 17038283500 ],\n    \"samples_ts\": [ 29.9824, 30.057, 30.05 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:31:24Z",
+          "avg_ns": 1702907913,
+          "stddev_ns": 269645,
+          "avg_ts": 75.165545,
+          "stddev_ts": 0.011621,
+          "samples_ns": [
+            1702604947,
+            1703037915,
+            1703080879
+          ],
+          "samples_ts": [
+            75.1789,
+            75.1598,
+            75.1579
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:31:31Z",
+          "avg_ns": 17049756038,
+          "stddev_ns": 23425561,
+          "avg_ts": 30.029793,
+          "stddev_ts": 0.041227,
+          "samples_ns": [
+            17076706113,
+            17034278502,
+            17038283500
+          ],
+          "samples_ts": [
+            29.9824,
+            30.057,
+            30.05
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 9
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:33:04.981699+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:32:23Z\",\n    \"avg_ns\": 7269167702,\n    \"stddev_ns\": 339811,\n    \"avg_ts\": 70.434474,\n    \"stddev_ts\": 0.003187,\n    \"samples_ns\": [ 7269380224, 7268788809, 7269334074 ],\n    \"samples_ts\": [ 70.4324, 70.4381, 70.4329 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:32:52Z\",\n    \"avg_ns\": 4159241036,\n    \"stddev_ns\": 549880,\n    \"avg_ts\": 30.774846,\n    \"stddev_ts\": 0.004040,\n    \"samples_ns\": [ 4158762190, 4159125158, 4159835761 ],\n    \"samples_ts\": [ 30.7784, 30.7757, 30.7704 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:32:23Z",
+          "avg_ns": 7269167702,
+          "stddev_ns": 339811,
+          "avg_ts": 70.434474,
+          "stddev_ts": 0.003187,
+          "samples_ns": [
+            7269380224,
+            7268788809,
+            7269334074
+          ],
+          "samples_ts": [
+            70.4324,
+            70.4381,
+            70.4329
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:32:52Z",
+          "avg_ns": 4159241036,
+          "stddev_ns": 549880,
+          "avg_ts": 30.774846,
+          "stddev_ts": 0.00404,
+          "samples_ns": [
+            4158762190,
+            4159125158,
+            4159835761
+          ],
+          "samples_ts": [
+            30.7784,
+            30.7757,
+            30.7704
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 10
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:34:26.378858+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:33:05Z\",\n    \"avg_ns\": 7297553864,\n    \"stddev_ns\": 348150,\n    \"avg_ts\": 70.160496,\n    \"stddev_ts\": 0.003245,\n    \"samples_ns\": [ 7297408825, 7297939647, 7297313121 ],\n    \"samples_ts\": [ 70.1619, 70.1568, 70.1628 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:33:34Z\",\n    \"avg_ns\": 17142932744,\n    \"stddev_ns\": 57568592,\n    \"avg_ts\": 29.866760,\n    \"stddev_ts\": 0.100491,\n    \"samples_ns\": [ 17076460980, 17175681861, 17176655393 ],\n    \"samples_ts\": [ 29.9828, 29.8096, 29.8079 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:33:05Z",
+          "avg_ns": 7297553864,
+          "stddev_ns": 348150,
+          "avg_ts": 70.160496,
+          "stddev_ts": 0.003245,
+          "samples_ns": [
+            7297408825,
+            7297939647,
+            7297313121
+          ],
+          "samples_ts": [
+            70.1619,
+            70.1568,
+            70.1628
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:33:34Z",
+          "avg_ns": 17142932744,
+          "stddev_ns": 57568592,
+          "avg_ts": 29.86676,
+          "stddev_ts": 0.100491,
+          "samples_ns": [
+            17076460980,
+            17175681861,
+            17176655393
+          ],
+          "samples_ts": [
+            29.9828,
+            29.8096,
+            29.8079
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 11
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:34:46.366244+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:34:27Z\",\n    \"avg_ns\": 1705057356,\n    \"stddev_ns\": 394219,\n    \"avg_ts\": 75.070791,\n    \"stddev_ts\": 0.017358,\n    \"samples_ns\": [ 1704640659, 1705424392, 1705107017 ],\n    \"samples_ts\": [ 75.0891, 75.0546, 75.0686 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:34:33Z\",\n    \"avg_ns\": 4131878641,\n    \"stddev_ns\": 609983,\n    \"avg_ts\": 30.978645,\n    \"stddev_ts\": 0.004548,\n    \"samples_ns\": [ 4132537002, 4131342412, 4131756510 ],\n    \"samples_ts\": [ 30.9737, 30.9827, 30.9796 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:34:27Z",
+          "avg_ns": 1705057356,
+          "stddev_ns": 394219,
+          "avg_ts": 75.070791,
+          "stddev_ts": 0.017358,
+          "samples_ns": [
+            1704640659,
+            1705424392,
+            1705107017
+          ],
+          "samples_ts": [
+            75.0891,
+            75.0546,
+            75.0686
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:34:33Z",
+          "avg_ns": 4131878641,
+          "stddev_ns": 609983,
+          "avg_ts": 30.978645,
+          "stddev_ts": 0.004548,
+          "samples_ns": [
+            4132537002,
+            4131342412,
+            4131756510
+          ],
+          "samples_ts": [
+            30.9737,
+            30.9827,
+            30.9796
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 12
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:35:45.290287+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:34:47Z\",\n    \"avg_ns\": 1704141350,\n    \"stddev_ns\": 83725,\n    \"avg_ts\": 75.111140,\n    \"stddev_ts\": 0.003690,\n    \"samples_ns\": [ 1704091244, 1704238006, 1704094800 ],\n    \"samples_ts\": [ 75.1133, 75.1069, 75.1132 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:34:53Z\",\n    \"avg_ns\": 17111524862,\n    \"stddev_ns\": 15104520,\n    \"avg_ts\": 29.921370,\n    \"stddev_ts\": 0.026423,\n    \"samples_ns\": [ 17094120257, 17119266911, 17121187420 ],\n    \"samples_ts\": [ 29.9518, 29.9078, 29.9045 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:34:47Z",
+          "avg_ns": 1704141350,
+          "stddev_ns": 83725,
+          "avg_ts": 75.11114,
+          "stddev_ts": 0.00369,
+          "samples_ns": [
+            1704091244,
+            1704238006,
+            1704094800
+          ],
+          "samples_ts": [
+            75.1133,
+            75.1069,
+            75.1132
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:34:53Z",
+          "avg_ns": 17111524862,
+          "stddev_ns": 15104520,
+          "avg_ts": 29.92137,
+          "stddev_ts": 0.026423,
+          "samples_ns": [
+            17094120257,
+            17119266911,
+            17121187420
+          ],
+          "samples_ts": [
+            29.9518,
+            29.9078,
+            29.9045
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 13
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:36:26.525324+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:35:45Z\",\n    \"avg_ns\": 7008650359,\n    \"stddev_ns\": 1003246,\n    \"avg_ts\": 73.052582,\n    \"stddev_ts\": 0.010458,\n    \"samples_ns\": [ 7009442518, 7007522251, 7008986308 ],\n    \"samples_ts\": [ 73.0443, 73.0643, 73.0491 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:36:13Z\",\n    \"avg_ns\": 4140625617,\n    \"stddev_ns\": 174762,\n    \"avg_ts\": 30.913203,\n    \"stddev_ts\": 0.001114,\n    \"samples_ns\": [ 4140531314, 4140797633, 4140547906 ],\n    \"samples_ts\": [ 30.9139, 30.9119, 30.9138 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:35:45Z",
+          "avg_ns": 7008650359,
+          "stddev_ns": 1003246,
+          "avg_ts": 73.052582,
+          "stddev_ts": 0.010458,
+          "samples_ns": [
+            7009442518,
+            7007522251,
+            7008986308
+          ],
+          "samples_ts": [
+            73.0443,
+            73.0643,
+            73.0491
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:36:13Z",
+          "avg_ns": 4140625617,
+          "stddev_ns": 174762,
+          "avg_ts": 30.913203,
+          "stddev_ts": 0.001114,
+          "samples_ns": [
+            4140531314,
+            4140797633,
+            4140547906
+          ],
+          "samples_ts": [
+            30.9139,
+            30.9119,
+            30.9138
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 14
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:37:46.111602+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:36:27Z\",\n    \"avg_ns\": 6997039294,\n    \"stddev_ns\": 575160,\n    \"avg_ts\": 73.173807,\n    \"stddev_ts\": 0.005951,\n    \"samples_ns\": [ 6997246352, 6996395712, 6997475819 ],\n    \"samples_ts\": [ 73.1716, 73.1805, 73.1692 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:36:55Z\",\n    \"avg_ns\": 16937813621,\n    \"stddev_ns\": 17060249,\n    \"avg_ts\": 30.228243,\n    \"stddev_ts\": 0.030464,\n    \"samples_ns\": [ 16918115429, 16947517456, 16947807979 ],\n    \"samples_ts\": [ 30.2634, 30.2109, 30.2104 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:36:27Z",
+          "avg_ns": 6997039294,
+          "stddev_ns": 575160,
+          "avg_ts": 73.173807,
+          "stddev_ts": 0.005951,
+          "samples_ns": [
+            6997246352,
+            6996395712,
+            6997475819
+          ],
+          "samples_ts": [
+            73.1716,
+            73.1805,
+            73.1692
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:36:55Z",
+          "avg_ns": 16937813621,
+          "stddev_ns": 17060249,
+          "avg_ts": 30.228243,
+          "stddev_ts": 0.030464,
+          "samples_ns": [
+            16918115429,
+            16947517456,
+            16947807979
+          ],
+          "samples_ts": [
+            30.2634,
+            30.2109,
+            30.2104
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 15
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:38:06.099859+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:37:46Z\",\n    \"avg_ns\": 1704734142,\n    \"stddev_ns\": 47076,\n    \"avg_ts\": 75.085022,\n    \"stddev_ts\": 0.000996,\n    \"samples_ns\": [ 1704759921, 1704717642, 1704724864 ],\n    \"samples_ts\": [ 75.0839, 75.0857, 75.0854 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:37:53Z\",\n    \"avg_ns\": 4130455572,\n    \"stddev_ns\": 226292,\n    \"avg_ts\": 30.989318,\n    \"stddev_ts\": 0.001628,\n    \"samples_ns\": [ 4130345102, 4130316064, 4130705551 ],\n    \"samples_ts\": [ 30.9901, 30.9904, 30.9874 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:37:46Z",
+          "avg_ns": 1704734142,
+          "stddev_ns": 47076,
+          "avg_ts": 75.085022,
+          "stddev_ts": 0.000996,
+          "samples_ns": [
+            1704759921,
+            1704717642,
+            1704724864
+          ],
+          "samples_ts": [
+            75.0839,
+            75.0857,
+            75.0854
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:37:53Z",
+          "avg_ns": 4130455572,
+          "stddev_ns": 226292,
+          "avg_ts": 30.989318,
+          "stddev_ts": 0.001628,
+          "samples_ns": [
+            4130345102,
+            4130316064,
+            4130705551
+          ],
+          "samples_ts": [
+            30.9901,
+            30.9904,
+            30.9874
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 16
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:39:05.191511+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:38:06Z\",\n    \"avg_ns\": 1704274710,\n    \"stddev_ns\": 269310,\n    \"avg_ts\": 75.105264,\n    \"stddev_ts\": 0.011867,\n    \"samples_ns\": [ 1704094763, 1704145042, 1704584325 ],\n    \"samples_ts\": [ 75.1132, 75.111, 75.0916 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:38:13Z\",\n    \"avg_ns\": 17166102472,\n    \"stddev_ns\": 30998701,\n    \"avg_ts\": 29.826288,\n    \"stddev_ts\": 0.053917,\n    \"samples_ns\": [ 17130315284, 17184610389, 17183381743 ],\n    \"samples_ts\": [ 29.8885, 29.7941, 29.7962 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:38:06Z",
+          "avg_ns": 1704274710,
+          "stddev_ns": 269310,
+          "avg_ts": 75.105264,
+          "stddev_ts": 0.011867,
+          "samples_ns": [
+            1704094763,
+            1704145042,
+            1704584325
+          ],
+          "samples_ts": [
+            75.1132,
+            75.111,
+            75.0916
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:38:13Z",
+          "avg_ns": 17166102472,
+          "stddev_ns": 30998701,
+          "avg_ts": 29.826288,
+          "stddev_ts": 0.053917,
+          "samples_ns": [
+            17130315284,
+            17184610389,
+            17183381743
+          ],
+          "samples_ts": [
+            29.8885,
+            29.7941,
+            29.7962
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 17
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:39:46.443495+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:39:05Z\",\n    \"avg_ns\": 7020994242,\n    \"stddev_ns\": 636133,\n    \"avg_ts\": 72.924145,\n    \"stddev_ts\": 0.006550,\n    \"samples_ns\": [ 7020297536, 7021525885, 7021159306 ],\n    \"samples_ts\": [ 72.9314, 72.9186, 72.9224 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:39:33Z\",\n    \"avg_ns\": 4130599370,\n    \"stddev_ns\": 481995,\n    \"avg_ts\": 30.988239,\n    \"stddev_ts\": 0.003551,\n    \"samples_ns\": [ 4130115599, 4131061556, 4130620957 ],\n    \"samples_ts\": [ 30.9919, 30.9848, 30.9881 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:39:05Z",
+          "avg_ns": 7020994242,
+          "stddev_ns": 636133,
+          "avg_ts": 72.924145,
+          "stddev_ts": 0.00655,
+          "samples_ns": [
+            7020297536,
+            7021525885,
+            7021159306
+          ],
+          "samples_ts": [
+            72.9314,
+            72.9186,
+            72.9224
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:39:33Z",
+          "avg_ns": 4130599370,
+          "stddev_ns": 481995,
+          "avg_ts": 30.988239,
+          "stddev_ts": 0.003551,
+          "samples_ns": [
+            4130115599,
+            4131061556,
+            4130620957
+          ],
+          "samples_ts": [
+            30.9919,
+            30.9848,
+            30.9881
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 18
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:41:06.733522+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:39:47Z\",\n    \"avg_ns\": 7031929495,\n    \"stddev_ns\": 510113,\n    \"avg_ts\": 72.810742,\n    \"stddev_ts\": 0.005210,\n    \"samples_ns\": [ 7032346014, 7032072046, 7031370426 ],\n    \"samples_ts\": [ 72.8064, 72.8093, 72.8165 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:40:15Z\",\n    \"avg_ns\": 17126419706,\n    \"stddev_ns\": 14453043,\n    \"avg_ts\": 29.895346,\n    \"stddev_ts\": 0.025219,\n    \"samples_ns\": [ 17142790360, 17115425156, 17121043602 ],\n    \"samples_ts\": [ 29.8668, 29.9145, 29.9047 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:39:47Z",
+          "avg_ns": 7031929495,
+          "stddev_ns": 510113,
+          "avg_ts": 72.810742,
+          "stddev_ts": 0.00521,
+          "samples_ns": [
+            7032346014,
+            7032072046,
+            7031370426
+          ],
+          "samples_ts": [
+            72.8064,
+            72.8093,
+            72.8165
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:40:15Z",
+          "avg_ns": 17126419706,
+          "stddev_ns": 14453043,
+          "avg_ts": 29.895346,
+          "stddev_ts": 0.025219,
+          "samples_ns": [
+            17142790360,
+            17115425156,
+            17121043602
+          ],
+          "samples_ts": [
+            29.8668,
+            29.9145,
+            29.9047
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 19
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:41:26.750157+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:41:07Z\",\n    \"avg_ns\": 1704095337,\n    \"stddev_ns\": 345886,\n    \"avg_ts\": 75.113170,\n    \"stddev_ts\": 0.015245,\n    \"samples_ns\": [ 1703771092, 1704055503, 1704459416 ],\n    \"samples_ts\": [ 75.1275, 75.1149, 75.0971 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:41:14Z\",\n    \"avg_ns\": 4142214979,\n    \"stddev_ns\": 531827,\n    \"avg_ts\": 30.901342,\n    \"stddev_ts\": 0.003967,\n    \"samples_ns\": [ 4142134317, 4142782530, 4141728090 ],\n    \"samples_ts\": [ 30.9019, 30.8971, 30.905 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:41:07Z",
+          "avg_ns": 1704095337,
+          "stddev_ns": 345886,
+          "avg_ts": 75.11317,
+          "stddev_ts": 0.015245,
+          "samples_ns": [
+            1703771092,
+            1704055503,
+            1704459416
+          ],
+          "samples_ts": [
+            75.1275,
+            75.1149,
+            75.0971
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:41:14Z",
+          "avg_ns": 4142214979,
+          "stddev_ns": 531827,
+          "avg_ts": 30.901342,
+          "stddev_ts": 0.003967,
+          "samples_ns": [
+            4142134317,
+            4142782530,
+            4141728090
+          ],
+          "samples_ts": [
+            30.9019,
+            30.8971,
+            30.905
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 20
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:42:25.053049+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:41:27Z\",\n    \"avg_ns\": 1704052639,\n    \"stddev_ns\": 413011,\n    \"avg_ts\": 75.115053,\n    \"stddev_ts\": 0.018207,\n    \"samples_ns\": [ 1703599108, 1704407119, 1704151690 ],\n    \"samples_ts\": [ 75.135, 75.0994, 75.1107 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:41:34Z\",\n    \"avg_ns\": 16903511816,\n    \"stddev_ns\": 602702,\n    \"avg_ts\": 30.289564,\n    \"stddev_ts\": 0.001029,\n    \"samples_ns\": [ 16904087701, 16903507971, 16902939778 ],\n    \"samples_ts\": [ 30.2885, 30.2896, 30.2906 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:41:27Z",
+          "avg_ns": 1704052639,
+          "stddev_ns": 413011,
+          "avg_ts": 75.115053,
+          "stddev_ts": 0.018207,
+          "samples_ns": [
+            1703599108,
+            1704407119,
+            1704151690
+          ],
+          "samples_ts": [
+            75.135,
+            75.0994,
+            75.1107
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:41:34Z",
+          "avg_ns": 16903511816,
+          "stddev_ns": 602702,
+          "avg_ts": 30.289564,
+          "stddev_ts": 0.001029,
+          "samples_ns": [
+            16904087701,
+            16903507971,
+            16902939778
+          ],
+          "samples_ts": [
+            30.2885,
+            30.2896,
+            30.2906
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 21
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:43:07.375103+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:42:25Z\",\n    \"avg_ns\": 7281718699,\n    \"stddev_ns\": 1167709,\n    \"avg_ts\": 70.313072,\n    \"stddev_ts\": 0.011277,\n    \"samples_ns\": [ 7282443956, 7282340473, 7280371668 ],\n    \"samples_ts\": [ 70.3061, 70.3071, 70.3261 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:42:54Z\",\n    \"avg_ns\": 4139053460,\n    \"stddev_ns\": 665206,\n    \"avg_ts\": 30.924945,\n    \"stddev_ts\": 0.004924,\n    \"samples_ns\": [ 4139195907, 4138334933, 4139629542 ],\n    \"samples_ts\": [ 30.9239, 30.9303, 30.9206 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:42:25Z",
+          "avg_ns": 7281718699,
+          "stddev_ns": 1167709,
+          "avg_ts": 70.313072,
+          "stddev_ts": 0.011277,
+          "samples_ns": [
+            7282443956,
+            7282340473,
+            7280371668
+          ],
+          "samples_ts": [
+            70.3061,
+            70.3071,
+            70.3261
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:42:54Z",
+          "avg_ns": 4139053460,
+          "stddev_ns": 665206,
+          "avg_ts": 30.924945,
+          "stddev_ts": 0.004924,
+          "samples_ns": [
+            4139195907,
+            4138334933,
+            4139629542
+          ],
+          "samples_ts": [
+            30.9239,
+            30.9303,
+            30.9206
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 22
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:44:28.595374+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:43:08Z\",\n    \"avg_ns\": 7293532667,\n    \"stddev_ns\": 6765835,\n    \"avg_ts\": 70.199219,\n    \"stddev_ts\": 0.065144,\n    \"samples_ns\": [ 7285734871, 7297034431, 7297828701 ],\n    \"samples_ts\": [ 70.2743, 70.1655, 70.1579 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:43:37Z\",\n    \"avg_ns\": 17090149900,\n    \"stddev_ns\": 47943505,\n    \"avg_ts\": 29.958935,\n    \"stddev_ts\": 0.083942,\n    \"samples_ns\": [ 17143954042, 17074535676, 17051959982 ],\n    \"samples_ts\": [ 29.8648, 29.9862, 30.0259 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:43:08Z",
+          "avg_ns": 7293532667,
+          "stddev_ns": 6765835,
+          "avg_ts": 70.199219,
+          "stddev_ts": 0.065144,
+          "samples_ns": [
+            7285734871,
+            7297034431,
+            7297828701
+          ],
+          "samples_ts": [
+            70.2743,
+            70.1655,
+            70.1579
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:43:37Z",
+          "avg_ns": 17090149900,
+          "stddev_ns": 47943505,
+          "avg_ts": 29.958935,
+          "stddev_ts": 0.083942,
+          "samples_ns": [
+            17143954042,
+            17074535676,
+            17051959982
+          ],
+          "samples_ts": [
+            29.8648,
+            29.9862,
+            30.0259
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 23
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:44:48.609233+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:44:29Z\",\n    \"avg_ns\": 1703813339,\n    \"stddev_ns\": 120842,\n    \"avg_ts\": 75.125601,\n    \"stddev_ts\": 0.005328,\n    \"samples_ns\": [ 1703840074, 1703918575, 1703681368 ],\n    \"samples_ts\": [ 75.1244, 75.121, 75.1314 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:44:36Z\",\n    \"avg_ns\": 4125481147,\n    \"stddev_ns\": 881168,\n    \"avg_ts\": 31.026685,\n    \"stddev_ts\": 0.006626,\n    \"samples_ns\": [ 4124800193, 4126476364, 4125166884 ],\n    \"samples_ts\": [ 31.0318, 31.0192, 31.029 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:44:29Z",
+          "avg_ns": 1703813339,
+          "stddev_ns": 120842,
+          "avg_ts": 75.125601,
+          "stddev_ts": 0.005328,
+          "samples_ns": [
+            1703840074,
+            1703918575,
+            1703681368
+          ],
+          "samples_ts": [
+            75.1244,
+            75.121,
+            75.1314
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:44:36Z",
+          "avg_ns": 4125481147,
+          "stddev_ns": 881168,
+          "avg_ts": 31.026685,
+          "stddev_ts": 0.006626,
+          "samples_ns": [
+            4124800193,
+            4126476364,
+            4125166884
+          ],
+          "samples_ts": [
+            31.0318,
+            31.0192,
+            31.029
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 24
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:45:47.535321+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:44:49Z\",\n    \"avg_ns\": 1703837389,\n    \"stddev_ns\": 274907,\n    \"avg_ts\": 75.124541,\n    \"stddev_ts\": 0.012122,\n    \"samples_ns\": [ 1704083059, 1703888649, 1703540459 ],\n    \"samples_ts\": [ 75.1137, 75.1223, 75.1376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:44:56Z\",\n    \"avg_ns\": 17095298920,\n    \"stddev_ns\": 17637542,\n    \"avg_ts\": 29.949775,\n    \"stddev_ts\": 0.030881,\n    \"samples_ns\": [ 17115545028, 17087078513, 17083273221 ],\n    \"samples_ts\": [ 29.9143, 29.9642, 29.9708 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:44:49Z",
+          "avg_ns": 1703837389,
+          "stddev_ns": 274907,
+          "avg_ts": 75.124541,
+          "stddev_ts": 0.012122,
+          "samples_ns": [
+            1704083059,
+            1703888649,
+            1703540459
+          ],
+          "samples_ts": [
+            75.1137,
+            75.1223,
+            75.1376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:44:56Z",
+          "avg_ns": 17095298920,
+          "stddev_ns": 17637542,
+          "avg_ts": 29.949775,
+          "stddev_ts": 0.030881,
+          "samples_ns": [
+            17115545028,
+            17087078513,
+            17083273221
+          ],
+          "samples_ts": [
+            29.9143,
+            29.9642,
+            29.9708
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 25
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:46:28.668336+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:45:48Z\",\n    \"avg_ns\": 6997944245,\n    \"stddev_ns\": 715674,\n    \"avg_ts\": 73.164345,\n    \"stddev_ts\": 0.007482,\n    \"samples_ns\": [ 6997571783, 6998769337, 6997491615 ],\n    \"samples_ts\": [ 73.1682, 73.1557, 73.1691 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:46:16Z\",\n    \"avg_ns\": 4117905939,\n    \"stddev_ns\": 696880,\n    \"avg_ts\": 31.083761,\n    \"stddev_ts\": 0.005215,\n    \"samples_ns\": [ 4117413945, 4117608004, 4118695870 ],\n    \"samples_ts\": [ 31.0875, 31.086, 31.0778 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:45:48Z",
+          "avg_ns": 6997944245,
+          "stddev_ns": 715674,
+          "avg_ts": 73.164345,
+          "stddev_ts": 0.007482,
+          "samples_ns": [
+            6997571783,
+            6998769337,
+            6997491615
+          ],
+          "samples_ts": [
+            73.1682,
+            73.1557,
+            73.1691
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:46:16Z",
+          "avg_ns": 4117905939,
+          "stddev_ns": 696880,
+          "avg_ts": 31.083761,
+          "stddev_ts": 0.005215,
+          "samples_ns": [
+            4117413945,
+            4117608004,
+            4118695870
+          ],
+          "samples_ts": [
+            31.0875,
+            31.086,
+            31.0778
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 26
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:47:48.451782+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:46:29Z\",\n    \"avg_ns\": 6998207063,\n    \"stddev_ns\": 148542,\n    \"avg_ts\": 73.161596,\n    \"stddev_ts\": 0.001283,\n    \"samples_ns\": [ 6998067345, 6998256292, 6998297553 ],\n    \"samples_ts\": [ 73.1631, 73.1611, 73.1607 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:46:57Z\",\n    \"avg_ns\": 16995962723,\n    \"stddev_ns\": 30526396,\n    \"avg_ts\": 30.124866,\n    \"stddev_ts\": 0.054051,\n    \"samples_ns\": [ 17031209201, 16978668489, 16978010480 ],\n    \"samples_ts\": [ 30.0625, 30.1555, 30.1567 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:46:29Z",
+          "avg_ns": 6998207063,
+          "stddev_ns": 148542,
+          "avg_ts": 73.161596,
+          "stddev_ts": 0.001283,
+          "samples_ns": [
+            6998067345,
+            6998256292,
+            6998297553
+          ],
+          "samples_ts": [
+            73.1631,
+            73.1611,
+            73.1607
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:46:57Z",
+          "avg_ns": 16995962723,
+          "stddev_ns": 30526396,
+          "avg_ts": 30.124866,
+          "stddev_ts": 0.054051,
+          "samples_ns": [
+            17031209201,
+            16978668489,
+            16978010480
+          ],
+          "samples_ts": [
+            30.0625,
+            30.1555,
+            30.1567
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 27
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:48:08.457347+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:47:49Z\",\n    \"avg_ns\": 1703098002,\n    \"stddev_ns\": 179191,\n    \"avg_ts\": 75.157155,\n    \"stddev_ts\": 0.007696,\n    \"samples_ns\": [ 1703192325, 1703204899, 1702896783 ],\n    \"samples_ts\": [ 75.153, 75.1524, 75.166 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:47:55Z\",\n    \"avg_ns\": 4138012879,\n    \"stddev_ns\": 857528,\n    \"avg_ts\": 30.932722,\n    \"stddev_ts\": 0.006411,\n    \"samples_ns\": [ 4138342192, 4138656937, 4137039508 ],\n    \"samples_ts\": [ 30.9303, 30.9279, 30.94 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:47:49Z",
+          "avg_ns": 1703098002,
+          "stddev_ns": 179191,
+          "avg_ts": 75.157155,
+          "stddev_ts": 0.007696,
+          "samples_ns": [
+            1703192325,
+            1703204899,
+            1702896783
+          ],
+          "samples_ts": [
+            75.153,
+            75.1524,
+            75.166
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:47:55Z",
+          "avg_ns": 4138012879,
+          "stddev_ns": 857528,
+          "avg_ts": 30.932722,
+          "stddev_ts": 0.006411,
+          "samples_ns": [
+            4138342192,
+            4138656937,
+            4137039508
+          ],
+          "samples_ts": [
+            30.9303,
+            30.9279,
+            30.94
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 28
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:49:07.449694+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:48:09Z\",\n    \"avg_ns\": 1704012904,\n    \"stddev_ns\": 222859,\n    \"avg_ts\": 75.116803,\n    \"stddev_ts\": 0.009825,\n    \"samples_ns\": [ 1704199839, 1704072596, 1703766277 ],\n    \"samples_ts\": [ 75.1086, 75.1142, 75.1277 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:48:15Z\",\n    \"avg_ns\": 17133240267,\n    \"stddev_ns\": 13711311,\n    \"avg_ts\": 29.883444,\n    \"stddev_ts\": 0.023903,\n    \"samples_ns\": [ 17148969592, 17126932589, 17123818621 ],\n    \"samples_ts\": [ 29.856, 29.8944, 29.8999 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:48:09Z",
+          "avg_ns": 1704012904,
+          "stddev_ns": 222859,
+          "avg_ts": 75.116803,
+          "stddev_ts": 0.009825,
+          "samples_ns": [
+            1704199839,
+            1704072596,
+            1703766277
+          ],
+          "samples_ts": [
+            75.1086,
+            75.1142,
+            75.1277
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:48:15Z",
+          "avg_ns": 17133240267,
+          "stddev_ns": 13711311,
+          "avg_ts": 29.883444,
+          "stddev_ts": 0.023903,
+          "samples_ns": [
+            17148969592,
+            17126932589,
+            17123818621
+          ],
+          "samples_ts": [
+            29.856,
+            29.8944,
+            29.8999
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 29
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:49:48.701162+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:49:08Z\",\n    \"avg_ns\": 7034202421,\n    \"stddev_ns\": 533949,\n    \"avg_ts\": 72.787215,\n    \"stddev_ts\": 0.005456,\n    \"samples_ns\": [ 7034171409, 7034744565, 7033691290 ],\n    \"samples_ts\": [ 72.7875, 72.7816, 72.7925 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:49:36Z\",\n    \"avg_ns\": 4107693634,\n    \"stddev_ns\": 354397,\n    \"avg_ts\": 31.161039,\n    \"stddev_ts\": 0.002599,\n    \"samples_ns\": [ 4107803352, 4107967947, 4107309605 ],\n    \"samples_ts\": [ 31.1602, 31.159, 31.164 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:49:08Z",
+          "avg_ns": 7034202421,
+          "stddev_ns": 533949,
+          "avg_ts": 72.787215,
+          "stddev_ts": 0.005456,
+          "samples_ns": [
+            7034171409,
+            7034744565,
+            7033691290
+          ],
+          "samples_ts": [
+            72.7875,
+            72.7816,
+            72.7925
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:49:36Z",
+          "avg_ns": 4107693634,
+          "stddev_ns": 354397,
+          "avg_ts": 31.161039,
+          "stddev_ts": 0.002599,
+          "samples_ns": [
+            4107803352,
+            4107967947,
+            4107309605
+          ],
+          "samples_ts": [
+            31.1602,
+            31.159,
+            31.164
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 30
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:51:08.757076+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:49:49Z\",\n    \"avg_ns\": 7032072003,\n    \"stddev_ns\": 769140,\n    \"avg_ts\": 72.809266,\n    \"stddev_ts\": 0.007868,\n    \"samples_ns\": [ 7031336204, 7032853983, 7032025824 ],\n    \"samples_ts\": [ 72.8169, 72.8012, 72.8097 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:50:17Z\",\n    \"avg_ns\": 17034894423,\n    \"stddev_ns\": 42890901,\n    \"avg_ts\": 30.056081,\n    \"stddev_ts\": 0.075785,\n    \"samples_ns\": [ 16985391191, 17060944066, 17058348013 ],\n    \"samples_ts\": [ 30.1436, 30.0101, 30.0146 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:49:49Z",
+          "avg_ns": 7032072003,
+          "stddev_ns": 769140,
+          "avg_ts": 72.809266,
+          "stddev_ts": 0.007868,
+          "samples_ns": [
+            7031336204,
+            7032853983,
+            7032025824
+          ],
+          "samples_ts": [
+            72.8169,
+            72.8012,
+            72.8097
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:50:17Z",
+          "avg_ns": 17034894423,
+          "stddev_ns": 42890901,
+          "avg_ts": 30.056081,
+          "stddev_ts": 0.075785,
+          "samples_ns": [
+            16985391191,
+            17060944066,
+            17058348013
+          ],
+          "samples_ts": [
+            30.1436,
+            30.0101,
+            30.0146
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 31
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:51:28.756444+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:51:09Z\",\n    \"avg_ns\": 1702700787,\n    \"stddev_ns\": 167271,\n    \"avg_ts\": 75.174688,\n    \"stddev_ts\": 0.007157,\n    \"samples_ns\": [ 1702518834, 1702829801, 1702753727 ],\n    \"samples_ts\": [ 75.1827, 75.169, 75.1724 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:51:16Z\",\n    \"avg_ns\": 4133581855,\n    \"stddev_ns\": 749758,\n    \"avg_ts\": 30.965881,\n    \"stddev_ts\": 0.005575,\n    \"samples_ns\": [ 4134427324, 4133025861, 4133292382 ],\n    \"samples_ts\": [ 30.9595, 30.97, 30.968 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:51:09Z",
+          "avg_ns": 1702700787,
+          "stddev_ns": 167271,
+          "avg_ts": 75.174688,
+          "stddev_ts": 0.007157,
+          "samples_ns": [
+            1702518834,
+            1702829801,
+            1702753727
+          ],
+          "samples_ts": [
+            75.1827,
+            75.169,
+            75.1724
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:51:16Z",
+          "avg_ns": 4133581855,
+          "stddev_ns": 749758,
+          "avg_ts": 30.965881,
+          "stddev_ts": 0.005575,
+          "samples_ns": [
+            4134427324,
+            4133025861,
+            4133292382
+          ],
+          "samples_ts": [
+            30.9595,
+            30.97,
+            30.968
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 32
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:52:27.326517+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:51:29Z\",\n    \"avg_ns\": 1705490035,\n    \"stddev_ns\": 172896,\n    \"avg_ts\": 75.051744,\n    \"stddev_ts\": 0.007609,\n    \"samples_ns\": [ 1705303465, 1705521782, 1705644858 ],\n    \"samples_ts\": [ 75.06, 75.0503, 75.0449 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:51:36Z\",\n    \"avg_ns\": 16988574438,\n    \"stddev_ns\": 31374232,\n    \"avg_ts\": 30.137971,\n    \"stddev_ts\": 0.055599,\n    \"samples_ns\": [ 17024769410, 16969147101, 16971806804 ],\n    \"samples_ts\": [ 30.0738, 30.1724, 30.1677 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:51:29Z",
+          "avg_ns": 1705490035,
+          "stddev_ns": 172896,
+          "avg_ts": 75.051744,
+          "stddev_ts": 0.007609,
+          "samples_ns": [
+            1705303465,
+            1705521782,
+            1705644858
+          ],
+          "samples_ts": [
+            75.06,
+            75.0503,
+            75.0449
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:51:36Z",
+          "avg_ns": 16988574438,
+          "stddev_ns": 31374232,
+          "avg_ts": 30.137971,
+          "stddev_ts": 0.055599,
+          "samples_ns": [
+            17024769410,
+            16969147101,
+            16971806804
+          ],
+          "samples_ts": [
+            30.0738,
+            30.1724,
+            30.1677
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 33
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:53:09.728083+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:52:28Z\",\n    \"avg_ns\": 7289896168,\n    \"stddev_ns\": 790150,\n    \"avg_ts\": 70.234197,\n    \"stddev_ts\": 0.007613,\n    \"samples_ns\": [ 7289021730, 7290107875, 7290558899 ],\n    \"samples_ts\": [ 70.2426, 70.2322, 70.2278 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:52:57Z\",\n    \"avg_ns\": 4138514070,\n    \"stddev_ns\": 841216,\n    \"avg_ts\": 30.928976,\n    \"stddev_ts\": 0.006250,\n    \"samples_ns\": [ 4138681400, 4139254038, 4137606774 ],\n    \"samples_ts\": [ 30.9277, 30.9234, 30.9358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:52:28Z",
+          "avg_ns": 7289896168,
+          "stddev_ns": 790150,
+          "avg_ts": 70.234197,
+          "stddev_ts": 0.007613,
+          "samples_ns": [
+            7289021730,
+            7290107875,
+            7290558899
+          ],
+          "samples_ts": [
+            70.2426,
+            70.2322,
+            70.2278
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:52:57Z",
+          "avg_ns": 4138514070,
+          "stddev_ns": 841216,
+          "avg_ts": 30.928976,
+          "stddev_ts": 0.00625,
+          "samples_ns": [
+            4138681400,
+            4139254038,
+            4137606774
+          ],
+          "samples_ts": [
+            30.9277,
+            30.9234,
+            30.9358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 34
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:54:30.230477+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:53:10Z\",\n    \"avg_ns\": 7268279264,\n    \"stddev_ns\": 787263,\n    \"avg_ts\": 70.443084,\n    \"stddev_ts\": 0.007630,\n    \"samples_ns\": [ 7268625935, 7268833698, 7267378159 ],\n    \"samples_ts\": [ 70.4397, 70.4377, 70.4518 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:53:39Z\",\n    \"avg_ns\": 16881600385,\n    \"stddev_ns\": 2347077,\n    \"avg_ts\": 30.328878,\n    \"stddev_ts\": 0.004217,\n    \"samples_ns\": [ 16884055079, 16881367839, 16879378237 ],\n    \"samples_ts\": [ 30.3245, 30.3293, 30.3329 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:53:10Z",
+          "avg_ns": 7268279264,
+          "stddev_ns": 787263,
+          "avg_ts": 70.443084,
+          "stddev_ts": 0.00763,
+          "samples_ns": [
+            7268625935,
+            7268833698,
+            7267378159
+          ],
+          "samples_ts": [
+            70.4397,
+            70.4377,
+            70.4518
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:53:39Z",
+          "avg_ns": 16881600385,
+          "stddev_ns": 2347077,
+          "avg_ts": 30.328878,
+          "stddev_ts": 0.004217,
+          "samples_ns": [
+            16884055079,
+            16881367839,
+            16879378237
+          ],
+          "samples_ts": [
+            30.3245,
+            30.3293,
+            30.3329
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 35
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:54:43.821146+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:54:30Z\",\n    \"avg_ns\": 872073402,\n    \"stddev_ns\": 105546,\n    \"avg_ts\": 146.776637,\n    \"stddev_ts\": 0.016314,\n    \"samples_ns\": [ 871992846, 872180978, 872046384 ],\n    \"samples_ts\": [ 146.79, 146.759, 146.781 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:54:34Z\",\n    \"avg_ns\": 3109714224,\n    \"stddev_ns\": 2210701,\n    \"avg_ts\": 41.161352,\n    \"stddev_ts\": 0.029255,\n    \"samples_ns\": [ 3107418545, 3109896931, 3111827197 ],\n    \"samples_ts\": [ 41.1917, 41.1589, 41.1334 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:54:30Z",
+          "avg_ns": 872073402,
+          "stddev_ns": 105546,
+          "avg_ts": 146.776637,
+          "stddev_ts": 0.016314,
+          "samples_ns": [
+            871992846,
+            872180978,
+            872046384
+          ],
+          "samples_ts": [
+            146.79,
+            146.759,
+            146.781
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:54:34Z",
+          "avg_ns": 3109714224,
+          "stddev_ns": 2210701,
+          "avg_ts": 41.161352,
+          "stddev_ts": 0.029255,
+          "samples_ns": [
+            3107418545,
+            3109896931,
+            3111827197
+          ],
+          "samples_ts": [
+            41.1917,
+            41.1589,
+            41.1334
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 36
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:55:26.252325+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:54:44Z\",\n    \"avg_ns\": 872033111,\n    \"stddev_ns\": 77389,\n    \"avg_ts\": 146.783418,\n    \"stddev_ts\": 0.010967,\n    \"samples_ns\": [ 872108292, 871997975, 871993068 ],\n    \"samples_ts\": [ 146.771, 146.789, 146.79 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:54:47Z\",\n    \"avg_ns\": 12725229368,\n    \"stddev_ns\": 9461001,\n    \"avg_ts\": 40.235046,\n    \"stddev_ts\": 0.029915,\n    \"samples_ns\": [ 12715145269, 12733907116, 12726635721 ],\n    \"samples_ts\": [ 40.2669, 40.2076, 40.2306 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:54:44Z",
+          "avg_ns": 872033111,
+          "stddev_ns": 77389,
+          "avg_ts": 146.783418,
+          "stddev_ts": 0.010967,
+          "samples_ns": [
+            872108292,
+            871997975,
+            871993068
+          ],
+          "samples_ts": [
+            146.771,
+            146.789,
+            146.79
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:54:47Z",
+          "avg_ns": 12725229368,
+          "stddev_ns": 9461001,
+          "avg_ts": 40.235046,
+          "stddev_ts": 0.029915,
+          "samples_ns": [
+            12715145269,
+            12733907116,
+            12726635721
+          ],
+          "samples_ts": [
+            40.2669,
+            40.2076,
+            40.2306
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 37
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:55:50.633391+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:55:26Z\",\n    \"avg_ns\": 3572203858,\n    \"stddev_ns\": 418612,\n    \"avg_ts\": 143.328887,\n    \"stddev_ts\": 0.016795,\n    \"samples_ns\": [ 3571932873, 3572685994, 3571992707 ],\n    \"samples_ts\": [ 143.34, 143.31, 143.337 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:55:41Z\",\n    \"avg_ns\": 3106605614,\n    \"stddev_ns\": 769831,\n    \"avg_ts\": 41.202528,\n    \"stddev_ts\": 0.010184,\n    \"samples_ns\": [ 3107358655, 3105823839, 3106634349 ],\n    \"samples_ts\": [ 41.1925, 41.2129, 41.2021 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:55:26Z",
+          "avg_ns": 3572203858,
+          "stddev_ns": 418612,
+          "avg_ts": 143.328887,
+          "stddev_ts": 0.016795,
+          "samples_ns": [
+            3571932873,
+            3572685994,
+            3571992707
+          ],
+          "samples_ts": [
+            143.34,
+            143.31,
+            143.337
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:55:41Z",
+          "avg_ns": 3106605614,
+          "stddev_ns": 769831,
+          "avg_ts": 41.202528,
+          "stddev_ts": 0.010184,
+          "samples_ns": [
+            3107358655,
+            3105823839,
+            3106634349
+          ],
+          "samples_ts": [
+            41.1925,
+            41.2129,
+            41.2021
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 38
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:56:43.664416+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:55:51Z\",\n    \"avg_ns\": 3571229011,\n    \"stddev_ns\": 178757,\n    \"avg_ts\": 143.368011,\n    \"stddev_ts\": 0.007176,\n    \"samples_ns\": [ 3571435384, 3571122380, 3571129269 ],\n    \"samples_ts\": [ 143.36, 143.372, 143.372 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:56:05Z\",\n    \"avg_ns\": 12650059560,\n    \"stddev_ns\": 1774588,\n    \"avg_ts\": 40.474118,\n    \"stddev_ts\": 0.005666,\n    \"samples_ns\": [ 12649641900, 12648534700, 12652002081 ],\n    \"samples_ts\": [ 40.4755, 40.479, 40.4679 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:55:51Z",
+          "avg_ns": 3571229011,
+          "stddev_ns": 178757,
+          "avg_ts": 143.368011,
+          "stddev_ts": 0.007176,
+          "samples_ns": [
+            3571435384,
+            3571122380,
+            3571129269
+          ],
+          "samples_ts": [
+            143.36,
+            143.372,
+            143.372
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:56:05Z",
+          "avg_ns": 12650059560,
+          "stddev_ns": 1774588,
+          "avg_ts": 40.474118,
+          "stddev_ts": 0.005666,
+          "samples_ns": [
+            12649641900,
+            12648534700,
+            12652002081
+          ],
+          "samples_ts": [
+            40.4755,
+            40.479,
+            40.4679
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 39
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:56:57.261774+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:56:44Z\",\n    \"avg_ns\": 874367434,\n    \"stddev_ns\": 159000,\n    \"avg_ts\": 146.391549,\n    \"stddev_ts\": 0.026622,\n    \"samples_ns\": [ 874510843, 874395009, 874196450 ],\n    \"samples_ts\": [ 146.368, 146.387, 146.42 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:56:47Z\",\n    \"avg_ns\": 3108093499,\n    \"stddev_ns\": 712088,\n    \"avg_ts\": 41.182804,\n    \"stddev_ts\": 0.009406,\n    \"samples_ns\": [ 3107405842, 3108050925, 3108823731 ],\n    \"samples_ts\": [ 41.1919, 41.1834, 41.1731 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:56:44Z",
+          "avg_ns": 874367434,
+          "stddev_ns": 159000,
+          "avg_ts": 146.391549,
+          "stddev_ts": 0.026622,
+          "samples_ns": [
+            874510843,
+            874395009,
+            874196450
+          ],
+          "samples_ts": [
+            146.368,
+            146.387,
+            146.42
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:56:47Z",
+          "avg_ns": 3108093499,
+          "stddev_ns": 712088,
+          "avg_ts": 41.182804,
+          "stddev_ts": 0.009406,
+          "samples_ns": [
+            3107405842,
+            3108050925,
+            3108823731
+          ],
+          "samples_ts": [
+            41.1919,
+            41.1834,
+            41.1731
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 40
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:57:39.600060+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:56:57Z\",\n    \"avg_ns\": 872626446,\n    \"stddev_ns\": 43030,\n    \"avg_ts\": 146.683613,\n    \"stddev_ts\": 0.001734,\n    \"samples_ns\": [ 872634354, 872614780, 872630206 ],\n    \"samples_ts\": [ 146.682, 146.686, 146.683 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:57:01Z\",\n    \"avg_ns\": 12689302629,\n    \"stddev_ns\": 8272322,\n    \"avg_ts\": 40.348958,\n    \"stddev_ts\": 0.026300,\n    \"samples_ns\": [ 12681361312, 12697868964, 12688677612 ],\n    \"samples_ts\": [ 40.3742, 40.3217, 40.3509 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:56:57Z",
+          "avg_ns": 872626446,
+          "stddev_ns": 43030,
+          "avg_ts": 146.683613,
+          "stddev_ts": 0.001734,
+          "samples_ns": [
+            872634354,
+            872614780,
+            872630206
+          ],
+          "samples_ts": [
+            146.682,
+            146.686,
+            146.683
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:57:01Z",
+          "avg_ns": 12689302629,
+          "stddev_ns": 8272322,
+          "avg_ts": 40.348958,
+          "stddev_ts": 0.0263,
+          "samples_ns": [
+            12681361312,
+            12697868964,
+            12688677612
+          ],
+          "samples_ts": [
+            40.3742,
+            40.3217,
+            40.3509
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 41
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:58:04.080599+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:57:40Z\",\n    \"avg_ns\": 3600014972,\n    \"stddev_ns\": 388755,\n    \"avg_ts\": 142.221632,\n    \"stddev_ts\": 0.015174,\n    \"samples_ns\": [ 3600002936, 3600404946, 3599637035 ],\n    \"samples_ts\": [ 142.222, 142.206, 142.237 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:57:54Z\",\n    \"avg_ns\": 3102364313,\n    \"stddev_ns\": 498456,\n    \"avg_ts\": 41.258856,\n    \"stddev_ts\": 0.006630,\n    \"samples_ns\": [ 3102562408, 3102733269, 3101797262 ],\n    \"samples_ts\": [ 41.2562, 41.2539, 41.2664 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:57:40Z",
+          "avg_ns": 3600014972,
+          "stddev_ns": 388755,
+          "avg_ts": 142.221632,
+          "stddev_ts": 0.015174,
+          "samples_ns": [
+            3600002936,
+            3600404946,
+            3599637035
+          ],
+          "samples_ts": [
+            142.222,
+            142.206,
+            142.237
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:57:54Z",
+          "avg_ns": 3102364313,
+          "stddev_ns": 498456,
+          "avg_ts": 41.258856,
+          "stddev_ts": 0.00663,
+          "samples_ns": [
+            3102562408,
+            3102733269,
+            3101797262
+          ],
+          "samples_ts": [
+            41.2562,
+            41.2539,
+            41.2664
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 42
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:58:57.448581+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:58:04Z\",\n    \"avg_ns\": 3599569232,\n    \"stddev_ns\": 472670,\n    \"avg_ts\": 142.239244,\n    \"stddev_ts\": 0.018528,\n    \"samples_ns\": [ 3599057454, 3599978028, 3599672215 ],\n    \"samples_ts\": [ 142.259, 142.223, 142.235 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:58:19Z\",\n    \"avg_ns\": 12730146253,\n    \"stddev_ns\": 5849413,\n    \"avg_ts\": 40.219496,\n    \"stddev_ts\": 0.018478,\n    \"samples_ns\": [ 12723435885, 12732854607, 12734148269 ],\n    \"samples_ts\": [ 40.2407, 40.2109, 40.2069 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:58:04Z",
+          "avg_ns": 3599569232,
+          "stddev_ns": 472670,
+          "avg_ts": 142.239244,
+          "stddev_ts": 0.018528,
+          "samples_ns": [
+            3599057454,
+            3599978028,
+            3599672215
+          ],
+          "samples_ts": [
+            142.259,
+            142.223,
+            142.235
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:58:19Z",
+          "avg_ns": 12730146253,
+          "stddev_ns": 5849413,
+          "avg_ts": 40.219496,
+          "stddev_ts": 0.018478,
+          "samples_ns": [
+            12723435885,
+            12732854607,
+            12734148269
+          ],
+          "samples_ts": [
+            40.2407,
+            40.2109,
+            40.2069
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 43
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:59:11.020082+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:58:58Z\",\n    \"avg_ns\": 872306871,\n    \"stddev_ns\": 119174,\n    \"avg_ts\": 146.737353,\n    \"stddev_ts\": 0.019423,\n    \"samples_ns\": [ 872389182, 872174892, 872356540 ],\n    \"samples_ts\": [ 146.724, 146.76, 146.729 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:59:01Z\",\n    \"avg_ns\": 3095021122,\n    \"stddev_ns\": 507182,\n    \"avg_ts\": 41.356746,\n    \"stddev_ts\": 0.006777,\n    \"samples_ns\": [ 3095604939, 3094769245, 3094689182 ],\n    \"samples_ts\": [ 41.3489, 41.3601, 41.3612 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:58:58Z",
+          "avg_ns": 872306871,
+          "stddev_ns": 119174,
+          "avg_ts": 146.737353,
+          "stddev_ts": 0.019423,
+          "samples_ns": [
+            872389182,
+            872174892,
+            872356540
+          ],
+          "samples_ts": [
+            146.724,
+            146.76,
+            146.729
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:59:01Z",
+          "avg_ns": 3095021122,
+          "stddev_ns": 507182,
+          "avg_ts": 41.356746,
+          "stddev_ts": 0.006777,
+          "samples_ns": [
+            3095604939,
+            3094769245,
+            3094689182
+          ],
+          "samples_ts": [
+            41.3489,
+            41.3601,
+            41.3612
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 44
+    },
+    {
+      "timestamp_utc": "2025-12-08T20:59:53.327708+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:59:11Z\",\n    \"avg_ns\": 872392780,\n    \"stddev_ns\": 57059,\n    \"avg_ts\": 146.722902,\n    \"stddev_ts\": 0.008211,\n    \"samples_ns\": [ 872340884, 872437792, 872399665 ],\n    \"samples_ts\": [ 146.732, 146.715, 146.722 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:59:15Z\",\n    \"avg_ns\": 12681116834,\n    \"stddev_ns\": 1653963,\n    \"avg_ts\": 40.374993,\n    \"stddev_ts\": 0.005266,\n    \"samples_ns\": [ 12679337322, 12682607088, 12681406092 ],\n    \"samples_ts\": [ 40.3807, 40.3702, 40.3741 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:59:11Z",
+          "avg_ns": 872392780,
+          "stddev_ns": 57059,
+          "avg_ts": 146.722902,
+          "stddev_ts": 0.008211,
+          "samples_ns": [
+            872340884,
+            872437792,
+            872399665
+          ],
+          "samples_ts": [
+            146.732,
+            146.715,
+            146.722
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:59:15Z",
+          "avg_ns": 12681116834,
+          "stddev_ns": 1653963,
+          "avg_ts": 40.374993,
+          "stddev_ts": 0.005266,
+          "samples_ns": [
+            12679337322,
+            12682607088,
+            12681406092
+          ],
+          "samples_ts": [
+            40.3807,
+            40.3702,
+            40.3741
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 45
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:00:18.396021+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T20:59:53Z\",\n    \"avg_ns\": 3736275800,\n    \"stddev_ns\": 1155368,\n    \"avg_ts\": 137.034860,\n    \"stddev_ts\": 0.042375,\n    \"samples_ns\": [ 3737442573, 3735132184, 3736252643 ],\n    \"samples_ts\": [ 136.992, 137.077, 137.036 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:00:08Z\",\n    \"avg_ns\": 3113956767,\n    \"stddev_ns\": 1462826,\n    \"avg_ts\": 41.105265,\n    \"stddev_ts\": 0.019314,\n    \"samples_ns\": [ 3115086468, 3114479433, 3112304400 ],\n    \"samples_ts\": [ 41.0904, 41.0984, 41.1271 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T20:59:53Z",
+          "avg_ns": 3736275800,
+          "stddev_ns": 1155368,
+          "avg_ts": 137.03486,
+          "stddev_ts": 0.042375,
+          "samples_ns": [
+            3737442573,
+            3735132184,
+            3736252643
+          ],
+          "samples_ts": [
+            136.992,
+            137.077,
+            137.036
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:00:08Z",
+          "avg_ns": 3113956767,
+          "stddev_ns": 1462826,
+          "avg_ts": 41.105265,
+          "stddev_ts": 0.019314,
+          "samples_ns": [
+            3115086468,
+            3114479433,
+            3112304400
+          ],
+          "samples_ts": [
+            41.0904,
+            41.0984,
+            41.1271
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 46
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:01:12.125056+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:00:19Z\",\n    \"avg_ns\": 3733892814,\n    \"stddev_ns\": 369001,\n    \"avg_ts\": 137.122309,\n    \"stddev_ts\": 0.013551,\n    \"samples_ns\": [ 3733924281, 3733509087, 3734245074 ],\n    \"samples_ts\": [ 137.121, 137.136, 137.109 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:00:34Z\",\n    \"avg_ns\": 12671400170,\n    \"stddev_ns\": 8811570,\n    \"avg_ts\": 40.405966,\n    \"stddev_ts\": 0.028093,\n    \"samples_ns\": [ 12680915481, 12663521952, 12669763077 ],\n    \"samples_ts\": [ 40.3756, 40.4311, 40.4112 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:00:19Z",
+          "avg_ns": 3733892814,
+          "stddev_ns": 369001,
+          "avg_ts": 137.122309,
+          "stddev_ts": 0.013551,
+          "samples_ns": [
+            3733924281,
+            3733509087,
+            3734245074
+          ],
+          "samples_ts": [
+            137.121,
+            137.136,
+            137.109
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:00:34Z",
+          "avg_ns": 12671400170,
+          "stddev_ns": 8811570,
+          "avg_ts": 40.405966,
+          "stddev_ts": 0.028093,
+          "samples_ns": [
+            12680915481,
+            12663521952,
+            12669763077
+          ],
+          "samples_ts": [
+            40.3756,
+            40.4311,
+            40.4112
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 47
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:01:25.786891+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:01:12Z\",\n    \"avg_ns\": 872791340,\n    \"stddev_ns\": 85218,\n    \"avg_ts\": 146.655901,\n    \"stddev_ts\": 0.012480,\n    \"samples_ns\": [ 872732872, 872766236, 872874914 ],\n    \"samples_ts\": [ 146.666, 146.66, 146.642 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:01:16Z\",\n    \"avg_ns\": 3117834535,\n    \"stddev_ns\": 5003343,\n    \"avg_ts\": 41.054205,\n    \"stddev_ts\": 0.065845,\n    \"samples_ns\": [ 3113556648, 3116610711, 3123336246 ],\n    \"samples_ts\": [ 41.1105, 41.0703, 40.9818 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:01:12Z",
+          "avg_ns": 872791340,
+          "stddev_ns": 85218,
+          "avg_ts": 146.655901,
+          "stddev_ts": 0.01248,
+          "samples_ns": [
+            872732872,
+            872766236,
+            872874914
+          ],
+          "samples_ts": [
+            146.666,
+            146.66,
+            146.642
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:01:16Z",
+          "avg_ns": 3117834535,
+          "stddev_ns": 5003343,
+          "avg_ts": 41.054205,
+          "stddev_ts": 0.065845,
+          "samples_ns": [
+            3113556648,
+            3116610711,
+            3123336246
+          ],
+          "samples_ts": [
+            41.1105,
+            41.0703,
+            40.9818
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 48
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:02:08.095387+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:01:26Z\",\n    \"avg_ns\": 872058144,\n    \"stddev_ns\": 130829,\n    \"avg_ts\": 146.779206,\n    \"stddev_ts\": 0.020869,\n    \"samples_ns\": [ 872176144, 871928936, 872069354 ],\n    \"samples_ts\": [ 146.759, 146.801, 146.777 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:01:29Z\",\n    \"avg_ns\": 12673382412,\n    \"stddev_ns\": 1889673,\n    \"avg_ts\": 40.399634,\n    \"stddev_ts\": 0.006024,\n    \"samples_ns\": [ 12675400760, 12671655206, 12673091270 ],\n    \"samples_ts\": [ 40.3932, 40.4051, 40.4006 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:01:26Z",
+          "avg_ns": 872058144,
+          "stddev_ns": 130829,
+          "avg_ts": 146.779206,
+          "stddev_ts": 0.020869,
+          "samples_ns": [
+            872176144,
+            871928936,
+            872069354
+          ],
+          "samples_ts": [
+            146.759,
+            146.801,
+            146.777
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:01:29Z",
+          "avg_ns": 12673382412,
+          "stddev_ns": 1889673,
+          "avg_ts": 40.399634,
+          "stddev_ts": 0.006024,
+          "samples_ns": [
+            12675400760,
+            12671655206,
+            12673091270
+          ],
+          "samples_ts": [
+            40.3932,
+            40.4051,
+            40.4006
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 49
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:02:32.533824+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:02:08Z\",\n    \"avg_ns\": 3572540263,\n    \"stddev_ns\": 381415,\n    \"avg_ts\": 143.315391,\n    \"stddev_ts\": 0.015300,\n    \"samples_ns\": [ 3572468658, 3572952406, 3572199725 ],\n    \"samples_ts\": [ 143.318, 143.299, 143.329 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:02:23Z\",\n    \"avg_ns\": 3124251241,\n    \"stddev_ns\": 1205047,\n    \"avg_ts\": 40.969821,\n    \"stddev_ts\": 0.015799,\n    \"samples_ns\": [ 3123393750, 3125629024, 3123730949 ],\n    \"samples_ts\": [ 40.9811, 40.9518, 40.9766 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:02:08Z",
+          "avg_ns": 3572540263,
+          "stddev_ns": 381415,
+          "avg_ts": 143.315391,
+          "stddev_ts": 0.0153,
+          "samples_ns": [
+            3572468658,
+            3572952406,
+            3572199725
+          ],
+          "samples_ts": [
+            143.318,
+            143.299,
+            143.329
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:02:23Z",
+          "avg_ns": 3124251241,
+          "stddev_ns": 1205047,
+          "avg_ts": 40.969821,
+          "stddev_ts": 0.015799,
+          "samples_ns": [
+            3123393750,
+            3125629024,
+            3123730949
+          ],
+          "samples_ts": [
+            40.9811,
+            40.9518,
+            40.9766
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 50
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:03:25.669177+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:02:33Z\",\n    \"avg_ns\": 3572171258,\n    \"stddev_ns\": 185473,\n    \"avg_ts\": 143.330194,\n    \"stddev_ts\": 0.007442,\n    \"samples_ns\": [ 3571980172, 3572350556, 3572183046 ],\n    \"samples_ts\": [ 143.338, 143.323, 143.33 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:02:47Z\",\n    \"avg_ns\": 12682023970,\n    \"stddev_ns\": 2152672,\n    \"avg_ts\": 40.372106,\n    \"stddev_ts\": 0.006833,\n    \"samples_ns\": [ 12684487096, 12681034060, 12680550756 ],\n    \"samples_ts\": [ 40.3643, 40.3753, 40.3768 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:02:33Z",
+          "avg_ns": 3572171258,
+          "stddev_ns": 185473,
+          "avg_ts": 143.330194,
+          "stddev_ts": 0.007442,
+          "samples_ns": [
+            3571980172,
+            3572350556,
+            3572183046
+          ],
+          "samples_ts": [
+            143.338,
+            143.323,
+            143.33
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:02:47Z",
+          "avg_ns": 12682023970,
+          "stddev_ns": 2152672,
+          "avg_ts": 40.372106,
+          "stddev_ts": 0.006833,
+          "samples_ns": [
+            12684487096,
+            12681034060,
+            12680550756
+          ],
+          "samples_ts": [
+            40.3643,
+            40.3753,
+            40.3768
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 51
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:03:39.279602+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:03:26Z\",\n    \"avg_ns\": 872801027,\n    \"stddev_ns\": 86626,\n    \"avg_ts\": 146.654274,\n    \"stddev_ts\": 0.013683,\n    \"samples_ns\": [ 872811465, 872714879, 872876738 ],\n    \"samples_ts\": [ 146.653, 146.669, 146.642 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:03:29Z\",\n    \"avg_ns\": 3114050561,\n    \"stddev_ns\": 1241419,\n    \"avg_ts\": 41.104025,\n    \"stddev_ts\": 0.016383,\n    \"samples_ns\": [ 3113039557, 3113675991, 3115436135 ],\n    \"samples_ts\": [ 41.1174, 41.109, 41.0857 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:03:26Z",
+          "avg_ns": 872801027,
+          "stddev_ns": 86626,
+          "avg_ts": 146.654274,
+          "stddev_ts": 0.013683,
+          "samples_ns": [
+            872811465,
+            872714879,
+            872876738
+          ],
+          "samples_ts": [
+            146.653,
+            146.669,
+            146.642
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:03:29Z",
+          "avg_ns": 3114050561,
+          "stddev_ns": 1241419,
+          "avg_ts": 41.104025,
+          "stddev_ts": 0.016383,
+          "samples_ns": [
+            3113039557,
+            3113675991,
+            3115436135
+          ],
+          "samples_ts": [
+            41.1174,
+            41.109,
+            41.0857
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 52
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:04:21.847213+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:03:39Z\",\n    \"avg_ns\": 872204703,\n    \"stddev_ns\": 110053,\n    \"avg_ts\": 146.754541,\n    \"stddev_ts\": 0.017838,\n    \"samples_ns\": [ 872105662, 872316536, 872191912 ],\n    \"samples_ts\": [ 146.771, 146.736, 146.757 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:03:43Z\",\n    \"avg_ns\": 12764831120,\n    \"stddev_ns\": 6122645,\n    \"avg_ts\": 40.110212,\n    \"stddev_ts\": 0.019236,\n    \"samples_ns\": [ 12771584629, 12759643462, 12763265269 ],\n    \"samples_ts\": [ 40.089, 40.1265, 40.1151 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:03:39Z",
+          "avg_ns": 872204703,
+          "stddev_ns": 110053,
+          "avg_ts": 146.754541,
+          "stddev_ts": 0.017838,
+          "samples_ns": [
+            872105662,
+            872316536,
+            872191912
+          ],
+          "samples_ts": [
+            146.771,
+            146.736,
+            146.757
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:03:43Z",
+          "avg_ns": 12764831120,
+          "stddev_ns": 6122645,
+          "avg_ts": 40.110212,
+          "stddev_ts": 0.019236,
+          "samples_ns": [
+            12771584629,
+            12759643462,
+            12763265269
+          ],
+          "samples_ts": [
+            40.089,
+            40.1265,
+            40.1151
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 53
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:04:46.444987+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:04:22Z\",\n    \"avg_ns\": 3620284598,\n    \"stddev_ns\": 328816,\n    \"avg_ts\": 141.425346,\n    \"stddev_ts\": 0.012629,\n    \"samples_ns\": [ 3619922391, 3620543834, 3620387570 ],\n    \"samples_ts\": [ 141.439, 141.415, 141.421 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:04:37Z\",\n    \"avg_ns\": 3104391842,\n    \"stddev_ns\": 1833840,\n    \"avg_ts\": 41.231918,\n    \"stddev_ts\": 0.024349,\n    \"samples_ns\": [ 3103079015, 3103609390, 3106487121 ],\n    \"samples_ts\": [ 41.2494, 41.2423, 41.2041 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:04:22Z",
+          "avg_ns": 3620284598,
+          "stddev_ns": 328816,
+          "avg_ts": 141.425346,
+          "stddev_ts": 0.012629,
+          "samples_ns": [
+            3619922391,
+            3620543834,
+            3620387570
+          ],
+          "samples_ts": [
+            141.439,
+            141.415,
+            141.421
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:04:37Z",
+          "avg_ns": 3104391842,
+          "stddev_ns": 1833840,
+          "avg_ts": 41.231918,
+          "stddev_ts": 0.024349,
+          "samples_ns": [
+            3103079015,
+            3103609390,
+            3106487121
+          ],
+          "samples_ts": [
+            41.2494,
+            41.2423,
+            41.2041
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 54
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:05:39.806612+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:04:47Z\",\n    \"avg_ns\": 3602433437,\n    \"stddev_ns\": 1180086,\n    \"avg_ts\": 142.126162,\n    \"stddev_ts\": 0.046552,\n    \"samples_ns\": [ 3603749728, 3601470108, 3602080475 ],\n    \"samples_ts\": [ 142.074, 142.164, 142.14 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:05:01Z\",\n    \"avg_ns\": 12724268346,\n    \"stddev_ns\": 5720484,\n    \"avg_ts\": 40.238075,\n    \"stddev_ts\": 0.018091,\n    \"samples_ns\": [ 12727859790, 12717672899, 12727272350 ],\n    \"samples_ts\": [ 40.2267, 40.2589, 40.2286 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:04:47Z",
+          "avg_ns": 3602433437,
+          "stddev_ns": 1180086,
+          "avg_ts": 142.126162,
+          "stddev_ts": 0.046552,
+          "samples_ns": [
+            3603749728,
+            3601470108,
+            3602080475
+          ],
+          "samples_ts": [
+            142.074,
+            142.164,
+            142.14
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:05:01Z",
+          "avg_ns": 12724268346,
+          "stddev_ns": 5720484,
+          "avg_ts": 40.238075,
+          "stddev_ts": 0.018091,
+          "samples_ns": [
+            12727859790,
+            12717672899,
+            12727272350
+          ],
+          "samples_ts": [
+            40.2267,
+            40.2589,
+            40.2286
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 55
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:05:53.360058+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:05:40Z\",\n    \"avg_ns\": 872031450,\n    \"stddev_ns\": 76046,\n    \"avg_ts\": 146.783697,\n    \"stddev_ts\": 0.010698,\n    \"samples_ns\": [ 872055339, 872079598, 871959415 ],\n    \"samples_ts\": [ 146.78, 146.776, 146.796 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:05:43Z\",\n    \"avg_ns\": 3097169772,\n    \"stddev_ns\": 958321,\n    \"avg_ts\": 41.328057,\n    \"stddev_ts\": 0.012786,\n    \"samples_ns\": [ 3098257830, 3096451165, 3096800321 ],\n    \"samples_ts\": [ 41.3135, 41.3376, 41.333 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:05:40Z",
+          "avg_ns": 872031450,
+          "stddev_ns": 76046,
+          "avg_ts": 146.783697,
+          "stddev_ts": 0.010698,
+          "samples_ns": [
+            872055339,
+            872079598,
+            871959415
+          ],
+          "samples_ts": [
+            146.78,
+            146.776,
+            146.796
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:05:43Z",
+          "avg_ns": 3097169772,
+          "stddev_ns": 958321,
+          "avg_ts": 41.328057,
+          "stddev_ts": 0.012786,
+          "samples_ns": [
+            3098257830,
+            3096451165,
+            3096800321
+          ],
+          "samples_ts": [
+            41.3135,
+            41.3376,
+            41.333
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 56
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:06:35.839374+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:05:54Z\",\n    \"avg_ns\": 884664503,\n    \"stddev_ns\": 3447568,\n    \"avg_ts\": 144.689085,\n    \"stddev_ts\": 0.564588,\n    \"samples_ns\": [ 885520593, 887603103, 880869815 ],\n    \"samples_ts\": [ 144.548, 144.209, 145.311 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:05:57Z\",\n    \"avg_ns\": 12721240700,\n    \"stddev_ns\": 6042335,\n    \"avg_ts\": 40.247653,\n    \"stddev_ts\": 0.019115,\n    \"samples_ns\": [ 12714940819, 12726985004, 12721796278 ],\n    \"samples_ts\": [ 40.2676, 40.2295, 40.2459 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:05:54Z",
+          "avg_ns": 884664503,
+          "stddev_ns": 3447568,
+          "avg_ts": 144.689085,
+          "stddev_ts": 0.564588,
+          "samples_ns": [
+            885520593,
+            887603103,
+            880869815
+          ],
+          "samples_ts": [
+            144.548,
+            144.209,
+            145.311
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:05:57Z",
+          "avg_ns": 12721240700,
+          "stddev_ns": 6042335,
+          "avg_ts": 40.247653,
+          "stddev_ts": 0.019115,
+          "samples_ns": [
+            12714940819,
+            12726985004,
+            12721796278
+          ],
+          "samples_ts": [
+            40.2676,
+            40.2295,
+            40.2459
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 57
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:07:00.906566+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:06:36Z\",\n    \"avg_ns\": 3736657334,\n    \"stddev_ns\": 2802475,\n    \"avg_ts\": 137.020911,\n    \"stddev_ts\": 0.102673,\n    \"samples_ns\": [ 3739881241, 3734819065, 3735271698 ],\n    \"samples_ts\": [ 136.903, 137.088, 137.072 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:06:51Z\",\n    \"avg_ns\": 3108939858,\n    \"stddev_ns\": 794205,\n    \"avg_ts\": 41.171593,\n    \"stddev_ts\": 0.010519,\n    \"samples_ns\": [ 3108041241, 3109547684, 3109230649 ],\n    \"samples_ts\": [ 41.1835, 41.1635, 41.1677 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:06:36Z",
+          "avg_ns": 3736657334,
+          "stddev_ns": 2802475,
+          "avg_ts": 137.020911,
+          "stddev_ts": 0.102673,
+          "samples_ns": [
+            3739881241,
+            3734819065,
+            3735271698
+          ],
+          "samples_ts": [
+            136.903,
+            137.088,
+            137.072
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:06:51Z",
+          "avg_ns": 3108939858,
+          "stddev_ns": 794205,
+          "avg_ts": 41.171593,
+          "stddev_ts": 0.010519,
+          "samples_ns": [
+            3108041241,
+            3109547684,
+            3109230649
+          ],
+          "samples_ts": [
+            41.1835,
+            41.1635,
+            41.1677
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 58
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:07:54.585535+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:07:01Z\",\n    \"avg_ns\": 3735990562,\n    \"stddev_ns\": 500280,\n    \"avg_ts\": 137.045316,\n    \"stddev_ts\": 0.018350,\n    \"samples_ns\": [ 3735618550, 3736559303, 3735793833 ],\n    \"samples_ts\": [ 137.059, 137.024, 137.053 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:07:16Z\",\n    \"avg_ns\": 12650061080,\n    \"stddev_ns\": 2816199,\n    \"avg_ts\": 40.474114,\n    \"stddev_ts\": 0.009010,\n    \"samples_ns\": [ 12650066627, 12652874502, 12647242111 ],\n    \"samples_ts\": [ 40.4741, 40.4651, 40.4831 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:07:01Z",
+          "avg_ns": 3735990562,
+          "stddev_ns": 500280,
+          "avg_ts": 137.045316,
+          "stddev_ts": 0.01835,
+          "samples_ns": [
+            3735618550,
+            3736559303,
+            3735793833
+          ],
+          "samples_ts": [
+            137.059,
+            137.024,
+            137.053
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:07:16Z",
+          "avg_ns": 12650061080,
+          "stddev_ns": 2816199,
+          "avg_ts": 40.474114,
+          "stddev_ts": 0.00901,
+          "samples_ns": [
+            12650066627,
+            12652874502,
+            12647242111
+          ],
+          "samples_ts": [
+            40.4741,
+            40.4651,
+            40.4831
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 59
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:08:08.182976+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:07:55Z\",\n    \"avg_ns\": 872312867,\n    \"stddev_ns\": 131254,\n    \"avg_ts\": 146.736345,\n    \"stddev_ts\": 0.020929,\n    \"samples_ns\": [ 872456494, 872237640, 872244469 ],\n    \"samples_ts\": [ 146.712, 146.749, 146.748 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:07:58Z\",\n    \"avg_ns\": 3111036868,\n    \"stddev_ns\": 1387441,\n    \"avg_ts\": 41.143844,\n    \"stddev_ts\": 0.018330,\n    \"samples_ns\": [ 3110390491, 3112628335, 3110091779 ],\n    \"samples_ts\": [ 41.1524, 41.1228, 41.1563 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:07:55Z",
+          "avg_ns": 872312867,
+          "stddev_ns": 131254,
+          "avg_ts": 146.736345,
+          "stddev_ts": 0.020929,
+          "samples_ns": [
+            872456494,
+            872237640,
+            872244469
+          ],
+          "samples_ts": [
+            146.712,
+            146.749,
+            146.748
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:07:58Z",
+          "avg_ns": 3111036868,
+          "stddev_ns": 1387441,
+          "avg_ts": 41.143844,
+          "stddev_ts": 0.01833,
+          "samples_ns": [
+            3110390491,
+            3112628335,
+            3110091779
+          ],
+          "samples_ts": [
+            41.1524,
+            41.1228,
+            41.1563
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 60
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:08:50.574556+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:08:08Z\",\n    \"avg_ns\": 875840104,\n    \"stddev_ns\": 144346,\n    \"avg_ts\": 146.145400,\n    \"stddev_ts\": 0.024088,\n    \"samples_ns\": [ 875936047, 875674098, 875910167 ],\n    \"samples_ts\": [ 146.129, 146.173, 146.134 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:08:12Z\",\n    \"avg_ns\": 12688162445,\n    \"stddev_ns\": 19177340,\n    \"avg_ts\": 40.352634,\n    \"stddev_ts\": 0.060950,\n    \"samples_ns\": [ 12709718196, 12672993373, 12681775766 ],\n    \"samples_ts\": [ 40.2841, 40.4009, 40.3729 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:08:08Z",
+          "avg_ns": 875840104,
+          "stddev_ns": 144346,
+          "avg_ts": 146.1454,
+          "stddev_ts": 0.024088,
+          "samples_ns": [
+            875936047,
+            875674098,
+            875910167
+          ],
+          "samples_ts": [
+            146.129,
+            146.173,
+            146.134
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:08:12Z",
+          "avg_ns": 12688162445,
+          "stddev_ns": 19177340,
+          "avg_ts": 40.352634,
+          "stddev_ts": 0.06095,
+          "samples_ns": [
+            12709718196,
+            12672993373,
+            12681775766
+          ],
+          "samples_ts": [
+            40.2841,
+            40.4009,
+            40.3729
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 61
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:09:14.962951+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:08:51Z\",\n    \"avg_ns\": 3571676174,\n    \"stddev_ns\": 311759,\n    \"avg_ts\": 143.350062,\n    \"stddev_ts\": 0.012280,\n    \"samples_ns\": [ 3572006356, 3571619976, 3571402191 ],\n    \"samples_ts\": [ 143.337, 143.352, 143.361 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:09:05Z\",\n    \"avg_ns\": 3108750307,\n    \"stddev_ns\": 635723,\n    \"avg_ts\": 41.174102,\n    \"stddev_ts\": 0.008420,\n    \"samples_ns\": [ 3109344204, 3108079715, 3108827002 ],\n    \"samples_ts\": [ 41.1662, 41.183, 41.1731 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:08:51Z",
+          "avg_ns": 3571676174,
+          "stddev_ns": 311759,
+          "avg_ts": 143.350062,
+          "stddev_ts": 0.01228,
+          "samples_ns": [
+            3572006356,
+            3571619976,
+            3571402191
+          ],
+          "samples_ts": [
+            143.337,
+            143.352,
+            143.361
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:09:05Z",
+          "avg_ns": 3108750307,
+          "stddev_ns": 635723,
+          "avg_ts": 41.174102,
+          "stddev_ts": 0.00842,
+          "samples_ns": [
+            3109344204,
+            3108079715,
+            3108827002
+          ],
+          "samples_ts": [
+            41.1662,
+            41.183,
+            41.1731
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 62
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:10:08.086768+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:09:15Z\",\n    \"avg_ns\": 3582695655,\n    \"stddev_ns\": 208851,\n    \"avg_ts\": 142.909153,\n    \"stddev_ts\": 0.007981,\n    \"samples_ns\": [ 3582633809, 3582919367, 3582533790 ],\n    \"samples_ts\": [ 142.912, 142.9, 142.916 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:09:29Z\",\n    \"avg_ns\": 12662614054,\n    \"stddev_ns\": 2243504,\n    \"avg_ts\": 40.433990,\n    \"stddev_ts\": 0.007155,\n    \"samples_ns\": [ 12662966796, 12660217925, 12664657442 ],\n    \"samples_ts\": [ 40.4329, 40.4416, 40.4275 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:09:15Z",
+          "avg_ns": 3582695655,
+          "stddev_ns": 208851,
+          "avg_ts": 142.909153,
+          "stddev_ts": 0.007981,
+          "samples_ns": [
+            3582633809,
+            3582919367,
+            3582533790
+          ],
+          "samples_ts": [
+            142.912,
+            142.9,
+            142.916
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:09:29Z",
+          "avg_ns": 12662614054,
+          "stddev_ns": 2243504,
+          "avg_ts": 40.43399,
+          "stddev_ts": 0.007155,
+          "samples_ns": [
+            12662966796,
+            12660217925,
+            12664657442
+          ],
+          "samples_ts": [
+            40.4329,
+            40.4416,
+            40.4275
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 63
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:10:21.748304+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:10:08Z\",\n    \"avg_ns\": 876510739,\n    \"stddev_ns\": 345111,\n    \"avg_ts\": 146.033593,\n    \"stddev_ts\": 0.057086,\n    \"samples_ns\": [ 876710587, 876115190, 876706442 ],\n    \"samples_ts\": [ 146, 146.1, 146.001 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:10:12Z\",\n    \"avg_ns\": 3126004096,\n    \"stddev_ns\": 10277114,\n    \"avg_ts\": 40.947139,\n    \"stddev_ts\": 0.134857,\n    \"samples_ns\": [ 3133044153, 3130756978, 3114211159 ],\n    \"samples_ts\": [ 40.8548, 40.8847, 41.1019 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:10:08Z",
+          "avg_ns": 876510739,
+          "stddev_ns": 345111,
+          "avg_ts": 146.033593,
+          "stddev_ts": 0.057086,
+          "samples_ns": [
+            876710587,
+            876115190,
+            876706442
+          ],
+          "samples_ts": [
+            146,
+            146.1,
+            146.001
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:10:12Z",
+          "avg_ns": 3126004096,
+          "stddev_ns": 10277114,
+          "avg_ts": 40.947139,
+          "stddev_ts": 0.134857,
+          "samples_ns": [
+            3133044153,
+            3130756978,
+            3114211159
+          ],
+          "samples_ts": [
+            40.8548,
+            40.8847,
+            41.1019
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 64
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:11:04.127690+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:10:22Z\",\n    \"avg_ns\": 872255449,\n    \"stddev_ns\": 180355,\n    \"avg_ts\": 146.746006,\n    \"stddev_ts\": 0.029935,\n    \"samples_ns\": [ 872308623, 872057004, 872400721 ],\n    \"samples_ts\": [ 146.737, 146.779, 146.722 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:10:25Z\",\n    \"avg_ns\": 12705605264,\n    \"stddev_ns\": 10038963,\n    \"avg_ts\": 40.297192,\n    \"stddev_ts\": 0.031845,\n    \"samples_ns\": [ 12694492867, 12708308161, 12714014766 ],\n    \"samples_ts\": [ 40.3325, 40.2886, 40.2705 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:10:22Z",
+          "avg_ns": 872255449,
+          "stddev_ns": 180355,
+          "avg_ts": 146.746006,
+          "stddev_ts": 0.029935,
+          "samples_ns": [
+            872308623,
+            872057004,
+            872400721
+          ],
+          "samples_ts": [
+            146.737,
+            146.779,
+            146.722
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:10:25Z",
+          "avg_ns": 12705605264,
+          "stddev_ns": 10038963,
+          "avg_ts": 40.297192,
+          "stddev_ts": 0.031845,
+          "samples_ns": [
+            12694492867,
+            12708308161,
+            12714014766
+          ],
+          "samples_ts": [
+            40.3325,
+            40.2886,
+            40.2705
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 65
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:11:28.728680+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:11:04Z\",\n    \"avg_ns\": 3622558167,\n    \"stddev_ns\": 83399,\n    \"avg_ts\": 141.336585,\n    \"stddev_ts\": 0.002252,\n    \"samples_ns\": [ 3622546818, 3622620731, 3622506953 ],\n    \"samples_ts\": [ 141.337, 141.334, 141.339 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:11:19Z\",\n    \"avg_ns\": 3109899267,\n    \"stddev_ns\": 2385093,\n    \"avg_ts\": 41.158906,\n    \"stddev_ts\": 0.031544,\n    \"samples_ns\": [ 3108411457, 3108636839, 3112649506 ],\n    \"samples_ts\": [ 41.1786, 41.1756, 41.1225 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:11:04Z",
+          "avg_ns": 3622558167,
+          "stddev_ns": 83399,
+          "avg_ts": 141.336585,
+          "stddev_ts": 0.002252,
+          "samples_ns": [
+            3622546818,
+            3622620731,
+            3622506953
+          ],
+          "samples_ts": [
+            141.337,
+            141.334,
+            141.339
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:11:19Z",
+          "avg_ns": 3109899267,
+          "stddev_ns": 2385093,
+          "avg_ts": 41.158906,
+          "stddev_ts": 0.031544,
+          "samples_ns": [
+            3108411457,
+            3108636839,
+            3112649506
+          ],
+          "samples_ts": [
+            41.1786,
+            41.1756,
+            41.1225
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 66
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:12:21.913621+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:11:29Z\",\n    \"avg_ns\": 3622339954,\n    \"stddev_ns\": 144719,\n    \"avg_ts\": 141.345099,\n    \"stddev_ts\": 0.005136,\n    \"samples_ns\": [ 3622200245, 3622358016, 3622461602 ],\n    \"samples_ts\": [ 141.351, 141.344, 141.34 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:11:43Z\",\n    \"avg_ns\": 12630472157,\n    \"stddev_ns\": 9341214,\n    \"avg_ts\": 40.536900,\n    \"stddev_ts\": 0.029978,\n    \"samples_ns\": [ 12640097010, 12629876357, 12621443104 ],\n    \"samples_ts\": [ 40.506, 40.5388, 40.5659 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:11:29Z",
+          "avg_ns": 3622339954,
+          "stddev_ns": 144719,
+          "avg_ts": 141.345099,
+          "stddev_ts": 0.005136,
+          "samples_ns": [
+            3622200245,
+            3622358016,
+            3622461602
+          ],
+          "samples_ts": [
+            141.351,
+            141.344,
+            141.34
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:11:43Z",
+          "avg_ns": 12630472157,
+          "stddev_ns": 9341214,
+          "avg_ts": 40.5369,
+          "stddev_ts": 0.029978,
+          "samples_ns": [
+            12640097010,
+            12629876357,
+            12621443104
+          ],
+          "samples_ts": [
+            40.506,
+            40.5388,
+            40.5659
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 67
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:12:35.648910+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:12:22Z\",\n    \"avg_ns\": 885435373,\n    \"stddev_ns\": 802871,\n    \"avg_ts\": 144.561731,\n    \"stddev_ts\": 0.130961,\n    \"samples_ns\": [ 885750872, 886031400, 884523849 ],\n    \"samples_ts\": [ 144.51, 144.464, 144.711 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:12:26Z\",\n    \"avg_ns\": 3137697631,\n    \"stddev_ns\": 1091905,\n    \"avg_ts\": 40.794246,\n    \"stddev_ts\": 0.014161,\n    \"samples_ns\": [ 3138536599, 3136466925, 3138089371 ],\n    \"samples_ts\": [ 40.7833, 40.8103, 40.7892 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:12:22Z",
+          "avg_ns": 885435373,
+          "stddev_ns": 802871,
+          "avg_ts": 144.561731,
+          "stddev_ts": 0.130961,
+          "samples_ns": [
+            885750872,
+            886031400,
+            884523849
+          ],
+          "samples_ts": [
+            144.51,
+            144.464,
+            144.711
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:12:26Z",
+          "avg_ns": 3137697631,
+          "stddev_ns": 1091905,
+          "avg_ts": 40.794246,
+          "stddev_ts": 0.014161,
+          "samples_ns": [
+            3138536599,
+            3136466925,
+            3138089371
+          ],
+          "samples_ts": [
+            40.7833,
+            40.8103,
+            40.7892
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 68
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:13:18.400899+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:12:36Z\",\n    \"avg_ns\": 885055514,\n    \"stddev_ns\": 613290,\n    \"avg_ts\": 144.623743,\n    \"stddev_ts\": 0.100101,\n    \"samples_ns\": [ 884433707, 885658405, 885074431 ],\n    \"samples_ts\": [ 144.725, 144.525, 144.621 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:12:39Z\",\n    \"avg_ns\": 12795955941,\n    \"stddev_ns\": 8029115,\n    \"avg_ts\": 40.012652,\n    \"stddev_ts\": 0.025116,\n    \"samples_ns\": [ 12786711823, 12801191525, 12799964475 ],\n    \"samples_ts\": [ 40.0416, 39.9963, 40.0001 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:12:36Z",
+          "avg_ns": 885055514,
+          "stddev_ns": 613290,
+          "avg_ts": 144.623743,
+          "stddev_ts": 0.100101,
+          "samples_ns": [
+            884433707,
+            885658405,
+            885074431
+          ],
+          "samples_ts": [
+            144.725,
+            144.525,
+            144.621
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:12:39Z",
+          "avg_ns": 12795955941,
+          "stddev_ns": 8029115,
+          "avg_ts": 40.012652,
+          "stddev_ts": 0.025116,
+          "samples_ns": [
+            12786711823,
+            12801191525,
+            12799964475
+          ],
+          "samples_ts": [
+            40.0416,
+            39.9963,
+            40.0001
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 69
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:13:43.743917+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:13:19Z\",\n    \"avg_ns\": 3804577426,\n    \"stddev_ns\": 769205,\n    \"avg_ts\": 134.574739,\n    \"stddev_ts\": 0.027123,\n    \"samples_ns\": [ 3803704589, 3805142256, 3804885434 ],\n    \"samples_ts\": [ 134.606, 134.555, 134.564 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:13:34Z\",\n    \"avg_ns\": 3115112278,\n    \"stddev_ns\": 668820,\n    \"avg_ts\": 41.090013,\n    \"stddev_ts\": 0.008791,\n    \"samples_ns\": [ 3114467361, 3115798423, 3115071051 ],\n    \"samples_ts\": [ 41.0985, 41.081, 41.0906 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:13:19Z",
+          "avg_ns": 3804577426,
+          "stddev_ns": 769205,
+          "avg_ts": 134.574739,
+          "stddev_ts": 0.027123,
+          "samples_ns": [
+            3803704589,
+            3805142256,
+            3804885434
+          ],
+          "samples_ts": [
+            134.606,
+            134.555,
+            134.564
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:13:34Z",
+          "avg_ns": 3115112278,
+          "stddev_ns": 668820,
+          "avg_ts": 41.090013,
+          "stddev_ts": 0.008791,
+          "samples_ns": [
+            3114467361,
+            3115798423,
+            3115071051
+          ],
+          "samples_ts": [
+            41.0985,
+            41.081,
+            41.0906
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 70
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:14:37.398025+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:13:44Z\",\n    \"avg_ns\": 3741853247,\n    \"stddev_ns\": 586206,\n    \"avg_ts\": 136.830596,\n    \"stddev_ts\": 0.021202,\n    \"samples_ns\": [ 3742385518, 3741938780, 3741235445 ],\n    \"samples_ts\": [ 136.811, 136.827, 136.853 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:13:59Z\",\n    \"avg_ns\": 12633606479,\n    \"stddev_ns\": 3742838,\n    \"avg_ts\": 40.526831,\n    \"stddev_ts\": 0.011994,\n    \"samples_ns\": [ 12631202965, 12637914829, 12631701645 ],\n    \"samples_ts\": [ 40.5345, 40.513, 40.5329 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:13:44Z",
+          "avg_ns": 3741853247,
+          "stddev_ns": 586206,
+          "avg_ts": 136.830596,
+          "stddev_ts": 0.021202,
+          "samples_ns": [
+            3742385518,
+            3741938780,
+            3741235445
+          ],
+          "samples_ts": [
+            136.811,
+            136.827,
+            136.853
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:13:59Z",
+          "avg_ns": 12633606479,
+          "stddev_ns": 3742838,
+          "avg_ts": 40.526831,
+          "stddev_ts": 0.011994,
+          "samples_ns": [
+            12631202965,
+            12637914829,
+            12631701645
+          ],
+          "samples_ts": [
+            40.5345,
+            40.513,
+            40.5329
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 71
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:14:49.748688+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:14:38Z\",\n    \"avg_ns\": 604979118,\n    \"stddev_ns\": 878791,\n    \"avg_ts\": 211.577848,\n    \"stddev_ts\": 0.307087,\n    \"samples_ns\": [ 605990871, 604540629, 604405854 ],\n    \"samples_ts\": [ 211.224, 211.731, 211.778 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:14:40Z\",\n    \"avg_ns\": 3053405466,\n    \"stddev_ns\": 1737037,\n    \"avg_ts\": 41.920416,\n    \"stddev_ts\": 0.023844,\n    \"samples_ns\": [ 3051402095, 3054471353, 3054342951 ],\n    \"samples_ts\": [ 41.9479, 41.9058, 41.9075 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:14:38Z",
+          "avg_ns": 604979118,
+          "stddev_ns": 878791,
+          "avg_ts": 211.577848,
+          "stddev_ts": 0.307087,
+          "samples_ns": [
+            605990871,
+            604540629,
+            604405854
+          ],
+          "samples_ts": [
+            211.224,
+            211.731,
+            211.778
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:14:40Z",
+          "avg_ns": 3053405466,
+          "stddev_ns": 1737037,
+          "avg_ts": 41.920416,
+          "stddev_ts": 0.023844,
+          "samples_ns": [
+            3051402095,
+            3054471353,
+            3054342951
+          ],
+          "samples_ts": [
+            41.9479,
+            41.9058,
+            41.9075
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 72
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:15:30.562649+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:14:50Z\",\n    \"avg_ns\": 605426653,\n    \"stddev_ns\": 506728,\n    \"avg_ts\": 211.421250,\n    \"stddev_ts\": 0.176908,\n    \"samples_ns\": [ 605314082, 604985676, 605980201 ],\n    \"samples_ts\": [ 211.46, 211.575, 211.228 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:14:52Z\",\n    \"avg_ns\": 12522859582,\n    \"stddev_ns\": 12067091,\n    \"avg_ts\": 40.885256,\n    \"stddev_ts\": 0.039419,\n    \"samples_ns\": [ 12508934760, 12529387012, 12530256974 ],\n    \"samples_ts\": [ 40.9307, 40.8639, 40.8611 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:14:50Z",
+          "avg_ns": 605426653,
+          "stddev_ns": 506728,
+          "avg_ts": 211.42125,
+          "stddev_ts": 0.176908,
+          "samples_ns": [
+            605314082,
+            604985676,
+            605980201
+          ],
+          "samples_ts": [
+            211.46,
+            211.575,
+            211.228
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:14:52Z",
+          "avg_ns": 12522859582,
+          "stddev_ns": 12067091,
+          "avg_ts": 40.885256,
+          "stddev_ts": 0.039419,
+          "samples_ns": [
+            12508934760,
+            12529387012,
+            12530256974
+          ],
+          "samples_ts": [
+            40.9307,
+            40.8639,
+            40.8611
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 73
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:15:50.464061+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:15:31Z\",\n    \"avg_ns\": 2466743898,\n    \"stddev_ns\": 174333,\n    \"avg_ts\": 207.561069,\n    \"stddev_ts\": 0.014061,\n    \"samples_ns\": [ 2466560036, 2466785121, 2466886538 ],\n    \"samples_ts\": [ 207.577, 207.558, 207.549 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:15:41Z\",\n    \"avg_ns\": 3063916790,\n    \"stddev_ns\": 1050866,\n    \"avg_ts\": 41.776595,\n    \"stddev_ts\": 0.014309,\n    \"samples_ns\": [ 3062874511, 3064973184, 3063902676 ],\n    \"samples_ts\": [ 41.7908, 41.7622, 41.7768 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:15:31Z",
+          "avg_ns": 2466743898,
+          "stddev_ns": 174333,
+          "avg_ts": 207.561069,
+          "stddev_ts": 0.014061,
+          "samples_ns": [
+            2466560036,
+            2466785121,
+            2466886538
+          ],
+          "samples_ts": [
+            207.577,
+            207.558,
+            207.549
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:15:41Z",
+          "avg_ns": 3063916790,
+          "stddev_ns": 1050866,
+          "avg_ts": 41.776595,
+          "stddev_ts": 0.014309,
+          "samples_ns": [
+            3062874511,
+            3064973184,
+            3063902676
+          ],
+          "samples_ts": [
+            41.7908,
+            41.7622,
+            41.7768
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 74
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:16:38.376603+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:15:51Z\",\n    \"avg_ns\": 2461830634,\n    \"stddev_ns\": 1647624,\n    \"avg_ts\": 207.975376,\n    \"stddev_ts\": 0.139180,\n    \"samples_ns\": [ 2462652667, 2462904677, 2459934559 ],\n    \"samples_ts\": [ 207.906, 207.885, 208.136 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:16:00Z\",\n    \"avg_ns\": 12427884086,\n    \"stddev_ns\": 1725467,\n    \"avg_ts\": 41.197681,\n    \"stddev_ts\": 0.005720,\n    \"samples_ns\": [ 12425921764, 12429163921, 12428566573 ],\n    \"samples_ts\": [ 41.2042, 41.1934, 41.1954 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:15:51Z",
+          "avg_ns": 2461830634,
+          "stddev_ns": 1647624,
+          "avg_ts": 207.975376,
+          "stddev_ts": 0.13918,
+          "samples_ns": [
+            2462652667,
+            2462904677,
+            2459934559
+          ],
+          "samples_ts": [
+            207.906,
+            207.885,
+            208.136
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:16:00Z",
+          "avg_ns": 12427884086,
+          "stddev_ns": 1725467,
+          "avg_ts": 41.197681,
+          "stddev_ts": 0.00572,
+          "samples_ns": [
+            12425921764,
+            12429163921,
+            12428566573
+          ],
+          "samples_ts": [
+            41.2042,
+            41.1934,
+            41.1954
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 75
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:16:50.772429+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:16:39Z\",\n    \"avg_ns\": 604888728,\n    \"stddev_ns\": 253282,\n    \"avg_ts\": 211.609192,\n    \"stddev_ts\": 0.088626,\n    \"samples_ns\": [ 605067319, 604598856, 605000009 ],\n    \"samples_ts\": [ 211.547, 211.711, 211.57 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:16:41Z\",\n    \"avg_ns\": 3059335160,\n    \"stddev_ns\": 895217,\n    \"avg_ts\": 41.839158,\n    \"stddev_ts\": 0.012241,\n    \"samples_ns\": [ 3058982670, 3058669842, 3060352968 ],\n    \"samples_ts\": [ 41.844, 41.8483, 41.8252 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:16:39Z",
+          "avg_ns": 604888728,
+          "stddev_ns": 253282,
+          "avg_ts": 211.609192,
+          "stddev_ts": 0.088626,
+          "samples_ns": [
+            605067319,
+            604598856,
+            605000009
+          ],
+          "samples_ts": [
+            211.547,
+            211.711,
+            211.57
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:16:41Z",
+          "avg_ns": 3059335160,
+          "stddev_ns": 895217,
+          "avg_ts": 41.839158,
+          "stddev_ts": 0.012241,
+          "samples_ns": [
+            3058982670,
+            3058669842,
+            3060352968
+          ],
+          "samples_ts": [
+            41.844,
+            41.8483,
+            41.8252
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 76
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:17:31.508711+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:16:51Z\",\n    \"avg_ns\": 604797851,\n    \"stddev_ns\": 189839,\n    \"avg_ts\": 211.640977,\n    \"stddev_ts\": 0.065298,\n    \"samples_ns\": [ 604731812, 605008519, 604653224 ],\n    \"samples_ts\": [ 211.664, 211.567, 211.692 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:16:53Z\",\n    \"avg_ns\": 12497122656,\n    \"stddev_ns\": 12062261,\n    \"avg_ts\": 40.969456,\n    \"stddev_ts\": 0.039547,\n    \"samples_ns\": [ 12508672071, 12484606738, 12498089160 ],\n    \"samples_ts\": [ 40.9316, 41.0105, 40.9663 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:16:51Z",
+          "avg_ns": 604797851,
+          "stddev_ns": 189839,
+          "avg_ts": 211.640977,
+          "stddev_ts": 0.065298,
+          "samples_ns": [
+            604731812,
+            605008519,
+            604653224
+          ],
+          "samples_ts": [
+            211.664,
+            211.567,
+            211.692
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:16:53Z",
+          "avg_ns": 12497122656,
+          "stddev_ns": 12062261,
+          "avg_ts": 40.969456,
+          "stddev_ts": 0.039547,
+          "samples_ns": [
+            12508672071,
+            12484606738,
+            12498089160
+          ],
+          "samples_ts": [
+            40.9316,
+            41.0105,
+            40.9663
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 77
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:17:51.424088+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:17:32Z\",\n    \"avg_ns\": 2497965508,\n    \"stddev_ns\": 804971,\n    \"avg_ts\": 204.966815,\n    \"stddev_ts\": 0.065795,\n    \"samples_ns\": [ 2498778641, 2497942472, 2497175413 ],\n    \"samples_ts\": [ 204.9, 204.969, 205.032 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:17:42Z\",\n    \"avg_ns\": 3050256925,\n    \"stddev_ns\": 1983335,\n    \"avg_ts\": 41.963690,\n    \"stddev_ts\": 0.027290,\n    \"samples_ns\": [ 3050631995, 3048112834, 3052025946 ],\n    \"samples_ts\": [ 41.9585, 41.9932, 41.9394 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:17:32Z",
+          "avg_ns": 2497965508,
+          "stddev_ns": 804971,
+          "avg_ts": 204.966815,
+          "stddev_ts": 0.065795,
+          "samples_ns": [
+            2498778641,
+            2497942472,
+            2497175413
+          ],
+          "samples_ts": [
+            204.9,
+            204.969,
+            205.032
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:17:42Z",
+          "avg_ns": 3050256925,
+          "stddev_ns": 1983335,
+          "avg_ts": 41.96369,
+          "stddev_ts": 0.02729,
+          "samples_ns": [
+            3050631995,
+            3048112834,
+            3052025946
+          ],
+          "samples_ts": [
+            41.9585,
+            41.9932,
+            41.9394
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 78
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:18:39.621832+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:17:52Z\",\n    \"avg_ns\": 2527682721,\n    \"stddev_ns\": 865987,\n    \"avg_ts\": 202.557084,\n    \"stddev_ts\": 0.069407,\n    \"samples_ns\": [ 2528378798, 2527956408, 2526712957 ],\n    \"samples_ts\": [ 202.501, 202.535, 202.635 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:18:02Z\",\n    \"avg_ns\": 12434215959,\n    \"stddev_ns\": 9200896,\n    \"avg_ts\": 41.176717,\n    \"stddev_ts\": 0.030475,\n    \"samples_ns\": [ 12423892402, 12441545629, 12437209848 ],\n    \"samples_ts\": [ 41.2109, 41.1524, 41.1668 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:17:52Z",
+          "avg_ns": 2527682721,
+          "stddev_ns": 865987,
+          "avg_ts": 202.557084,
+          "stddev_ts": 0.069407,
+          "samples_ns": [
+            2528378798,
+            2527956408,
+            2526712957
+          ],
+          "samples_ts": [
+            202.501,
+            202.535,
+            202.635
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:18:02Z",
+          "avg_ns": 12434215959,
+          "stddev_ns": 9200896,
+          "avg_ts": 41.176717,
+          "stddev_ts": 0.030475,
+          "samples_ns": [
+            12423892402,
+            12441545629,
+            12437209848
+          ],
+          "samples_ts": [
+            41.2109,
+            41.1524,
+            41.1668
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 79
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:18:52.048323+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:18:40Z\",\n    \"avg_ns\": 605447705,\n    \"stddev_ns\": 168232,\n    \"avg_ts\": 211.413811,\n    \"stddev_ts\": 0.058119,\n    \"samples_ns\": [ 605585155, 605262673, 605495288 ],\n    \"samples_ts\": [ 211.366, 211.478, 211.397 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:18:42Z\",\n    \"avg_ns\": 3069277971,\n    \"stddev_ns\": 1286599,\n    \"avg_ts\": 41.703624,\n    \"stddev_ts\": 0.017481,\n    \"samples_ns\": [ 3069184180, 3068040834, 3070608899 ],\n    \"samples_ts\": [ 41.7049, 41.7204, 41.6855 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:18:40Z",
+          "avg_ns": 605447705,
+          "stddev_ns": 168232,
+          "avg_ts": 211.413811,
+          "stddev_ts": 0.058119,
+          "samples_ns": [
+            605585155,
+            605262673,
+            605495288
+          ],
+          "samples_ts": [
+            211.366,
+            211.478,
+            211.397
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:18:42Z",
+          "avg_ns": 3069277971,
+          "stddev_ns": 1286599,
+          "avg_ts": 41.703624,
+          "stddev_ts": 0.017481,
+          "samples_ns": [
+            3069184180,
+            3068040834,
+            3070608899
+          ],
+          "samples_ts": [
+            41.7049,
+            41.7204,
+            41.6855
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 80
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:19:32.713715+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:18:52Z\",\n    \"avg_ns\": 603832063,\n    \"stddev_ns\": 286544,\n    \"avg_ts\": 211.979501,\n    \"stddev_ts\": 0.100620,\n    \"samples_ns\": [ 603503109, 603965724, 604027356 ],\n    \"samples_ts\": [ 212.095, 211.933, 211.911 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:18:55Z\",\n    \"avg_ns\": 12489044123,\n    \"stddev_ns\": 7020694,\n    \"avg_ts\": 40.995940,\n    \"stddev_ts\": 0.023033,\n    \"samples_ns\": [ 12485286950, 12497141867, 12484703554 ],\n    \"samples_ts\": [ 41.0083, 40.9694, 41.0102 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:18:52Z",
+          "avg_ns": 603832063,
+          "stddev_ns": 286544,
+          "avg_ts": 211.979501,
+          "stddev_ts": 0.10062,
+          "samples_ns": [
+            603503109,
+            603965724,
+            604027356
+          ],
+          "samples_ts": [
+            212.095,
+            211.933,
+            211.911
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:18:55Z",
+          "avg_ns": 12489044123,
+          "stddev_ns": 7020694,
+          "avg_ts": 40.99594,
+          "stddev_ts": 0.023033,
+          "samples_ns": [
+            12485286950,
+            12497141867,
+            12484703554
+          ],
+          "samples_ts": [
+            41.0083,
+            40.9694,
+            41.0102
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 81
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:19:53.233600+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:19:33Z\",\n    \"avg_ns\": 2620157780,\n    \"stddev_ns\": 2283346,\n    \"avg_ts\": 195.408178,\n    \"stddev_ts\": 0.170225,\n    \"samples_ns\": [ 2617774253, 2620376134, 2622322955 ],\n    \"samples_ts\": [ 195.586, 195.392, 195.247 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:19:43Z\",\n    \"avg_ns\": 3071141507,\n    \"stddev_ns\": 2251017,\n    \"avg_ts\": 41.678329,\n    \"stddev_ts\": 0.030558,\n    \"samples_ns\": [ 3071867955, 3072939598, 3068616968 ],\n    \"samples_ts\": [ 41.6685, 41.6539, 41.7126 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:19:33Z",
+          "avg_ns": 2620157780,
+          "stddev_ns": 2283346,
+          "avg_ts": 195.408178,
+          "stddev_ts": 0.170225,
+          "samples_ns": [
+            2617774253,
+            2620376134,
+            2622322955
+          ],
+          "samples_ts": [
+            195.586,
+            195.392,
+            195.247
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:19:43Z",
+          "avg_ns": 3071141507,
+          "stddev_ns": 2251017,
+          "avg_ts": 41.678329,
+          "stddev_ts": 0.030558,
+          "samples_ns": [
+            3071867955,
+            3072939598,
+            3068616968
+          ],
+          "samples_ts": [
+            41.6685,
+            41.6539,
+            41.7126
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 82
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:20:42.034171+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:19:53Z\",\n    \"avg_ns\": 2671329519,\n    \"stddev_ns\": 806857,\n    \"avg_ts\": 191.664872,\n    \"stddev_ts\": 0.057782,\n    \"samples_ns\": [ 2670400055, 2671814744, 2671773759 ],\n    \"samples_ts\": [ 191.732, 191.63, 191.633 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:20:04Z\",\n    \"avg_ns\": 12444329516,\n    \"stddev_ns\": 7041229,\n    \"avg_ts\": 41.143246,\n    \"stddev_ts\": 0.023276,\n    \"samples_ns\": [ 12451916118, 12443068311, 12438004119 ],\n    \"samples_ts\": [ 41.1182, 41.1474, 41.1642 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:19:53Z",
+          "avg_ns": 2671329519,
+          "stddev_ns": 806857,
+          "avg_ts": 191.664872,
+          "stddev_ts": 0.057782,
+          "samples_ns": [
+            2670400055,
+            2671814744,
+            2671773759
+          ],
+          "samples_ts": [
+            191.732,
+            191.63,
+            191.633
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:20:04Z",
+          "avg_ns": 12444329516,
+          "stddev_ns": 7041229,
+          "avg_ts": 41.143246,
+          "stddev_ts": 0.023276,
+          "samples_ns": [
+            12451916118,
+            12443068311,
+            12438004119
+          ],
+          "samples_ts": [
+            41.1182,
+            41.1474,
+            41.1642
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 83
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:20:54.501854+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:20:42Z\",\n    \"avg_ns\": 606345730,\n    \"stddev_ns\": 1762075,\n    \"avg_ts\": 211.101873,\n    \"stddev_ts\": 0.612700,\n    \"samples_ns\": [ 604863022, 605880901, 608293269 ],\n    \"samples_ts\": [ 211.618, 211.263, 210.425 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:20:45Z\",\n    \"avg_ns\": 3063936945,\n    \"stddev_ns\": 3651070,\n    \"avg_ts\": 41.776356,\n    \"stddev_ts\": 0.049736,\n    \"samples_ns\": [ 3061994623, 3068147663, 3061668551 ],\n    \"samples_ts\": [ 41.8028, 41.719, 41.8073 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:20:42Z",
+          "avg_ns": 606345730,
+          "stddev_ns": 1762075,
+          "avg_ts": 211.101873,
+          "stddev_ts": 0.6127,
+          "samples_ns": [
+            604863022,
+            605880901,
+            608293269
+          ],
+          "samples_ts": [
+            211.618,
+            211.263,
+            210.425
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:20:45Z",
+          "avg_ns": 3063936945,
+          "stddev_ns": 3651070,
+          "avg_ts": 41.776356,
+          "stddev_ts": 0.049736,
+          "samples_ns": [
+            3061994623,
+            3068147663,
+            3061668551
+          ],
+          "samples_ts": [
+            41.8028,
+            41.719,
+            41.8073
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 84
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:21:35.090230+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:20:55Z\",\n    \"avg_ns\": 604914814,\n    \"stddev_ns\": 284992,\n    \"avg_ts\": 211.600073,\n    \"stddev_ts\": 0.098971,\n    \"samples_ns\": [ 605047518, 605106926, 604590000 ],\n    \"samples_ts\": [ 211.554, 211.533, 211.714 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:20:57Z\",\n    \"avg_ns\": 12463677755,\n    \"stddev_ns\": 2385281,\n    \"avg_ts\": 41.079369,\n    \"stddev_ts\": 0.007844,\n    \"samples_ns\": [ 12466421401, 12462443593, 12462168273 ],\n    \"samples_ts\": [ 41.0703, 41.0834, 41.0843 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:20:55Z",
+          "avg_ns": 604914814,
+          "stddev_ns": 284992,
+          "avg_ts": 211.600073,
+          "stddev_ts": 0.098971,
+          "samples_ns": [
+            605047518,
+            605106926,
+            604590000
+          ],
+          "samples_ts": [
+            211.554,
+            211.533,
+            211.714
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:20:57Z",
+          "avg_ns": 12463677755,
+          "stddev_ns": 2385281,
+          "avg_ts": 41.079369,
+          "stddev_ts": 0.007844,
+          "samples_ns": [
+            12466421401,
+            12462443593,
+            12462168273
+          ],
+          "samples_ts": [
+            41.0703,
+            41.0834,
+            41.0843
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 85
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:21:54.907579+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:21:35Z\",\n    \"avg_ns\": 2459432071,\n    \"stddev_ns\": 265340,\n    \"avg_ts\": 208.178144,\n    \"stddev_ts\": 0.021660,\n    \"samples_ns\": [ 2459724810, 2459250848, 2459320557 ],\n    \"samples_ts\": [ 208.153, 208.193, 208.188 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:21:45Z\",\n    \"avg_ns\": 3064927655,\n    \"stddev_ns\": 1850385,\n    \"avg_ts\": 41.762823,\n    \"stddev_ts\": 0.025210,\n    \"samples_ns\": [ 3062823416, 3066295991, 3065663559 ],\n    \"samples_ts\": [ 41.7915, 41.7442, 41.7528 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:21:35Z",
+          "avg_ns": 2459432071,
+          "stddev_ns": 265340,
+          "avg_ts": 208.178144,
+          "stddev_ts": 0.02166,
+          "samples_ns": [
+            2459724810,
+            2459250848,
+            2459320557
+          ],
+          "samples_ts": [
+            208.153,
+            208.193,
+            208.188
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:21:45Z",
+          "avg_ns": 3064927655,
+          "stddev_ns": 1850385,
+          "avg_ts": 41.762823,
+          "stddev_ts": 0.02521,
+          "samples_ns": [
+            3062823416,
+            3066295991,
+            3065663559
+          ],
+          "samples_ts": [
+            41.7915,
+            41.7442,
+            41.7528
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 86
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:22:42.926788+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:21:55Z\",\n    \"avg_ns\": 2468732848,\n    \"stddev_ns\": 627355,\n    \"avg_ts\": 207.393855,\n    \"stddev_ts\": 0.052364,\n    \"samples_ns\": [ 2468406103, 2468340735, 2469451708 ],\n    \"samples_ts\": [ 207.421, 207.427, 207.333 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:22:05Z\",\n    \"avg_ns\": 12454547854,\n    \"stddev_ns\": 4477551,\n    \"avg_ts\": 41.109485,\n    \"stddev_ts\": 0.014777,\n    \"samples_ns\": [ 12452772549, 12459640823, 12451230190 ],\n    \"samples_ts\": [ 41.1153, 41.0927, 41.1204 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:21:55Z",
+          "avg_ns": 2468732848,
+          "stddev_ns": 627355,
+          "avg_ts": 207.393855,
+          "stddev_ts": 0.052364,
+          "samples_ns": [
+            2468406103,
+            2468340735,
+            2469451708
+          ],
+          "samples_ts": [
+            207.421,
+            207.427,
+            207.333
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:22:05Z",
+          "avg_ns": 12454547854,
+          "stddev_ns": 4477551,
+          "avg_ts": 41.109485,
+          "stddev_ts": 0.014777,
+          "samples_ns": [
+            12452772549,
+            12459640823,
+            12451230190
+          ],
+          "samples_ts": [
+            41.1153,
+            41.0927,
+            41.1204
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 87
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:22:55.307421+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:22:43Z\",\n    \"avg_ns\": 605140850,\n    \"stddev_ns\": 188497,\n    \"avg_ts\": 211.521017,\n    \"stddev_ts\": 0.065883,\n    \"samples_ns\": [ 605340486, 605116134, 604965930 ],\n    \"samples_ts\": [ 211.451, 211.53, 211.582 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:22:46Z\",\n    \"avg_ns\": 3052521802,\n    \"stddev_ns\": 2957533,\n    \"avg_ts\": 41.932569,\n    \"stddev_ts\": 0.040628,\n    \"samples_ns\": [ 3049394072, 3055271808, 3052899527 ],\n    \"samples_ts\": [ 41.9756, 41.8948, 41.9274 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:22:43Z",
+          "avg_ns": 605140850,
+          "stddev_ns": 188497,
+          "avg_ts": 211.521017,
+          "stddev_ts": 0.065883,
+          "samples_ns": [
+            605340486,
+            605116134,
+            604965930
+          ],
+          "samples_ts": [
+            211.451,
+            211.53,
+            211.582
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:22:46Z",
+          "avg_ns": 3052521802,
+          "stddev_ns": 2957533,
+          "avg_ts": 41.932569,
+          "stddev_ts": 0.040628,
+          "samples_ns": [
+            3049394072,
+            3055271808,
+            3052899527
+          ],
+          "samples_ts": [
+            41.9756,
+            41.8948,
+            41.9274
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 88
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:23:35.982771+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:22:55Z\",\n    \"avg_ns\": 605921966,\n    \"stddev_ns\": 333598,\n    \"avg_ts\": 211.248367,\n    \"stddev_ts\": 0.115982,\n    \"samples_ns\": [ 606266461, 605602495, 605896943 ],\n    \"samples_ts\": [ 211.128, 211.36, 211.257 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:22:58Z\",\n    \"avg_ns\": 12483956811,\n    \"stddev_ns\": 4566686,\n    \"avg_ts\": 41.012642,\n    \"stddev_ts\": 0.014996,\n    \"samples_ns\": [ 12487825230, 12478923221, 12485121984 ],\n    \"samples_ts\": [ 40.9999, 41.0292, 41.0088 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:22:55Z",
+          "avg_ns": 605921966,
+          "stddev_ns": 333598,
+          "avg_ts": 211.248367,
+          "stddev_ts": 0.115982,
+          "samples_ns": [
+            606266461,
+            605602495,
+            605896943
+          ],
+          "samples_ts": [
+            211.128,
+            211.36,
+            211.257
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:22:58Z",
+          "avg_ns": 12483956811,
+          "stddev_ns": 4566686,
+          "avg_ts": 41.012642,
+          "stddev_ts": 0.014996,
+          "samples_ns": [
+            12487825230,
+            12478923221,
+            12485121984
+          ],
+          "samples_ts": [
+            40.9999,
+            41.0292,
+            41.0088
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 89
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:23:55.875034+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:23:36Z\",\n    \"avg_ns\": 2487597648,\n    \"stddev_ns\": 791717,\n    \"avg_ts\": 205.821080,\n    \"stddev_ts\": 0.065502,\n    \"samples_ns\": [ 2488438284, 2487488480, 2486866180 ],\n    \"samples_ts\": [ 205.752, 205.83, 205.882 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:23:46Z\",\n    \"avg_ns\": 3053552136,\n    \"stddev_ns\": 2331472,\n    \"avg_ts\": 41.918410,\n    \"stddev_ts\": 0.031996,\n    \"samples_ns\": [ 3051250513, 3055911091, 3053494805 ],\n    \"samples_ts\": [ 41.95, 41.886, 41.9192 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:23:36Z",
+          "avg_ns": 2487597648,
+          "stddev_ns": 791717,
+          "avg_ts": 205.82108,
+          "stddev_ts": 0.065502,
+          "samples_ns": [
+            2488438284,
+            2487488480,
+            2486866180
+          ],
+          "samples_ts": [
+            205.752,
+            205.83,
+            205.882
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:23:46Z",
+          "avg_ns": 3053552136,
+          "stddev_ns": 2331472,
+          "avg_ts": 41.91841,
+          "stddev_ts": 0.031996,
+          "samples_ns": [
+            3051250513,
+            3055911091,
+            3053494805
+          ],
+          "samples_ts": [
+            41.95,
+            41.886,
+            41.9192
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 90
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:24:44.049778+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:23:56Z\",\n    \"avg_ns\": 2526881910,\n    \"stddev_ns\": 833221,\n    \"avg_ts\": 202.621276,\n    \"stddev_ts\": 0.066562,\n    \"samples_ns\": [ 2527795084, 2526172757, 2526677891 ],\n    \"samples_ts\": [ 202.548, 202.678, 202.638 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:24:06Z\",\n    \"avg_ns\": 12430998670,\n    \"stddev_ns\": 2234043,\n    \"avg_ts\": 41.187359,\n    \"stddev_ts\": 0.007383,\n    \"samples_ns\": [ 12429634594, 12429791117, 12433570301 ],\n    \"samples_ts\": [ 41.1919, 41.1914, 41.1788 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:23:56Z",
+          "avg_ns": 2526881910,
+          "stddev_ns": 833221,
+          "avg_ts": 202.621276,
+          "stddev_ts": 0.066562,
+          "samples_ns": [
+            2527795084,
+            2526172757,
+            2526677891
+          ],
+          "samples_ts": [
+            202.548,
+            202.678,
+            202.638
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:24:06Z",
+          "avg_ns": 12430998670,
+          "stddev_ns": 2234043,
+          "avg_ts": 41.187359,
+          "stddev_ts": 0.007383,
+          "samples_ns": [
+            12429634594,
+            12429791117,
+            12433570301
+          ],
+          "samples_ts": [
+            41.1919,
+            41.1914,
+            41.1788
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 91
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:24:56.465620+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:24:44Z\",\n    \"avg_ns\": 606170438,\n    \"stddev_ns\": 4336557,\n    \"avg_ts\": 211.168912,\n    \"stddev_ts\": 1.505313,\n    \"samples_ns\": [ 604472253, 611098948, 602940115 ],\n    \"samples_ts\": [ 211.755, 209.459, 212.293 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:24:47Z\",\n    \"avg_ns\": 3070360324,\n    \"stddev_ns\": 2696786,\n    \"avg_ts\": 41.688939,\n    \"stddev_ts\": 0.036609,\n    \"samples_ns\": [ 3067461731, 3072792033, 3070827210 ],\n    \"samples_ts\": [ 41.7283, 41.6559, 41.6826 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:24:44Z",
+          "avg_ns": 606170438,
+          "stddev_ns": 4336557,
+          "avg_ts": 211.168912,
+          "stddev_ts": 1.505313,
+          "samples_ns": [
+            604472253,
+            611098948,
+            602940115
+          ],
+          "samples_ts": [
+            211.755,
+            209.459,
+            212.293
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:24:47Z",
+          "avg_ns": 3070360324,
+          "stddev_ns": 2696786,
+          "avg_ts": 41.688939,
+          "stddev_ts": 0.036609,
+          "samples_ns": [
+            3067461731,
+            3072792033,
+            3070827210
+          ],
+          "samples_ts": [
+            41.7283,
+            41.6559,
+            41.6826
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 92
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:25:37.003549+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:24:57Z\",\n    \"avg_ns\": 607358633,\n    \"stddev_ns\": 667669,\n    \"avg_ts\": 210.748800,\n    \"stddev_ts\": 0.231550,\n    \"samples_ns\": [ 608116872, 607100291, 606858736 ],\n    \"samples_ts\": [ 210.486, 210.838, 210.922 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:24:59Z\",\n    \"avg_ns\": 12435312691,\n    \"stddev_ns\": 9498085,\n    \"avg_ts\": 41.173086,\n    \"stddev_ts\": 0.031439,\n    \"samples_ns\": [ 12432640777, 12427436738, 12445860558 ],\n    \"samples_ts\": [ 41.1819, 41.1992, 41.1382 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:24:57Z",
+          "avg_ns": 607358633,
+          "stddev_ns": 667669,
+          "avg_ts": 210.7488,
+          "stddev_ts": 0.23155,
+          "samples_ns": [
+            608116872,
+            607100291,
+            606858736
+          ],
+          "samples_ts": [
+            210.486,
+            210.838,
+            210.922
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:24:59Z",
+          "avg_ns": 12435312691,
+          "stddev_ns": 9498085,
+          "avg_ts": 41.173086,
+          "stddev_ts": 0.031439,
+          "samples_ns": [
+            12432640777,
+            12427436738,
+            12445860558
+          ],
+          "samples_ts": [
+            41.1819,
+            41.1992,
+            41.1382
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 93
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:25:57.448288+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:25:37Z\",\n    \"avg_ns\": 2624255371,\n    \"stddev_ns\": 820531,\n    \"avg_ts\": 195.102976,\n    \"stddev_ts\": 0.061004,\n    \"samples_ns\": [ 2625069287, 2624268445, 2623428381 ],\n    \"samples_ts\": [ 195.042, 195.102, 195.164 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:25:48Z\",\n    \"avg_ns\": 3055043471,\n    \"stddev_ns\": 515058,\n    \"avg_ts\": 41.897932,\n    \"stddev_ts\": 0.006982,\n    \"samples_ns\": [ 3054943882, 3055595000, 3054591533 ],\n    \"samples_ts\": [ 41.8993, 41.8904, 41.9041 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:25:37Z",
+          "avg_ns": 2624255371,
+          "stddev_ns": 820531,
+          "avg_ts": 195.102976,
+          "stddev_ts": 0.061004,
+          "samples_ns": [
+            2625069287,
+            2624268445,
+            2623428381
+          ],
+          "samples_ts": [
+            195.042,
+            195.102,
+            195.164
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:25:48Z",
+          "avg_ns": 3055043471,
+          "stddev_ns": 515058,
+          "avg_ts": 41.897932,
+          "stddev_ts": 0.006982,
+          "samples_ns": [
+            3054943882,
+            3055595000,
+            3054591533
+          ],
+          "samples_ts": [
+            41.8993,
+            41.8904,
+            41.9041
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 94
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:26:46.194631+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:25:58Z\",\n    \"avg_ns\": 2665480577,\n    \"stddev_ns\": 1202149,\n    \"avg_ts\": 192.085462,\n    \"stddev_ts\": 0.086609,\n    \"samples_ns\": [ 2664824526, 2666868018, 2664749187 ],\n    \"samples_ts\": [ 192.133, 191.986, 192.138 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:26:08Z\",\n    \"avg_ns\": 12431813802,\n    \"stddev_ns\": 2483945,\n    \"avg_ts\": 41.184659,\n    \"stddev_ts\": 0.008228,\n    \"samples_ns\": [ 12434680420, 12430463338, 12430297648 ],\n    \"samples_ts\": [ 41.1752, 41.1891, 41.1897 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:25:58Z",
+          "avg_ns": 2665480577,
+          "stddev_ns": 1202149,
+          "avg_ts": 192.085462,
+          "stddev_ts": 0.086609,
+          "samples_ns": [
+            2664824526,
+            2666868018,
+            2664749187
+          ],
+          "samples_ts": [
+            192.133,
+            191.986,
+            192.138
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:26:08Z",
+          "avg_ns": 12431813802,
+          "stddev_ns": 2483945,
+          "avg_ts": 41.184659,
+          "stddev_ts": 0.008228,
+          "samples_ns": [
+            12434680420,
+            12430463338,
+            12430297648
+          ],
+          "samples_ts": [
+            41.1752,
+            41.1891,
+            41.1897
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 95
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:26:58.586514+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:26:46Z\",\n    \"avg_ns\": 604047732,\n    \"stddev_ns\": 760262,\n    \"avg_ts\": 211.904007,\n    \"stddev_ts\": 0.266416,\n    \"samples_ns\": [ 603448289, 603792570, 604902338 ],\n    \"samples_ts\": [ 212.114, 211.993, 211.604 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:26:49Z\",\n    \"avg_ns\": 3064181987,\n    \"stddev_ns\": 1595179,\n    \"avg_ts\": 41.772983,\n    \"stddev_ts\": 0.021733,\n    \"samples_ns\": [ 3064164968, 3062596346, 3065784648 ],\n    \"samples_ts\": [ 41.7732, 41.7946, 41.7511 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:26:46Z",
+          "avg_ns": 604047732,
+          "stddev_ns": 760262,
+          "avg_ts": 211.904007,
+          "stddev_ts": 0.266416,
+          "samples_ns": [
+            603448289,
+            603792570,
+            604902338
+          ],
+          "samples_ts": [
+            212.114,
+            211.993,
+            211.604
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:26:49Z",
+          "avg_ns": 3064181987,
+          "stddev_ns": 1595179,
+          "avg_ts": 41.772983,
+          "stddev_ts": 0.021733,
+          "samples_ns": [
+            3064164968,
+            3062596346,
+            3065784648
+          ],
+          "samples_ts": [
+            41.7732,
+            41.7946,
+            41.7511
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 96
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:27:39.100458+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:26:59Z\",\n    \"avg_ns\": 605413819,\n    \"stddev_ns\": 284857,\n    \"avg_ts\": 211.425664,\n    \"stddev_ts\": 0.098744,\n    \"samples_ns\": [ 605673432, 605112595, 605455432 ],\n    \"samples_ts\": [ 211.335, 211.531, 211.411 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:27:01Z\",\n    \"avg_ns\": 12435093870,\n    \"stddev_ns\": 2769534,\n    \"avg_ts\": 41.173796,\n    \"stddev_ts\": 0.009162,\n    \"samples_ns\": [ 12433690145, 12438281704, 12433309762 ],\n    \"samples_ts\": [ 41.1784, 41.1632, 41.1797 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:26:59Z",
+          "avg_ns": 605413819,
+          "stddev_ns": 284857,
+          "avg_ts": 211.425664,
+          "stddev_ts": 0.098744,
+          "samples_ns": [
+            605673432,
+            605112595,
+            605455432
+          ],
+          "samples_ts": [
+            211.335,
+            211.531,
+            211.411
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:27:01Z",
+          "avg_ns": 12435093870,
+          "stddev_ns": 2769534,
+          "avg_ts": 41.173796,
+          "stddev_ts": 0.009162,
+          "samples_ns": [
+            12433690145,
+            12438281704,
+            12433309762
+          ],
+          "samples_ts": [
+            41.1784,
+            41.1632,
+            41.1797
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 97
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:27:58.945879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:27:39Z\",\n    \"avg_ns\": 2462925863,\n    \"stddev_ns\": 1031724,\n    \"avg_ts\": 207.882855,\n    \"stddev_ts\": 0.087065,\n    \"samples_ns\": [ 2462552645, 2464092261, 2462132683 ],\n    \"samples_ts\": [ 207.914, 207.784, 207.95 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:27:49Z\",\n    \"avg_ns\": 3054561311,\n    \"stddev_ns\": 2390522,\n    \"avg_ts\": 41.904561,\n    \"stddev_ts\": 0.032777,\n    \"samples_ns\": [ 3057201425, 3052545630, 3053936879 ],\n    \"samples_ts\": [ 41.8684, 41.9322, 41.9131 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:27:39Z",
+          "avg_ns": 2462925863,
+          "stddev_ns": 1031724,
+          "avg_ts": 207.882855,
+          "stddev_ts": 0.087065,
+          "samples_ns": [
+            2462552645,
+            2464092261,
+            2462132683
+          ],
+          "samples_ts": [
+            207.914,
+            207.784,
+            207.95
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:27:49Z",
+          "avg_ns": 3054561311,
+          "stddev_ns": 2390522,
+          "avg_ts": 41.904561,
+          "stddev_ts": 0.032777,
+          "samples_ns": [
+            3057201425,
+            3052545630,
+            3053936879
+          ],
+          "samples_ts": [
+            41.8684,
+            41.9322,
+            41.9131
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 98
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:28:47.057852+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:27:59Z\",\n    \"avg_ns\": 2464081121,\n    \"stddev_ns\": 741571,\n    \"avg_ts\": 207.785380,\n    \"stddev_ts\": 0.062523,\n    \"samples_ns\": [ 2464933752, 2463723310, 2463586301 ],\n    \"samples_ts\": [ 207.713, 207.816, 207.827 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:28:09Z\",\n    \"avg_ns\": 12489619715,\n    \"stddev_ns\": 9327394,\n    \"avg_ts\": 40.994058,\n    \"stddev_ts\": 0.030602,\n    \"samples_ns\": [ 12481812515, 12487099841, 12499946791 ],\n    \"samples_ts\": [ 41.0197, 41.0023, 40.9602 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:27:59Z",
+          "avg_ns": 2464081121,
+          "stddev_ns": 741571,
+          "avg_ts": 207.78538,
+          "stddev_ts": 0.062523,
+          "samples_ns": [
+            2464933752,
+            2463723310,
+            2463586301
+          ],
+          "samples_ts": [
+            207.713,
+            207.816,
+            207.827
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:28:09Z",
+          "avg_ns": 12489619715,
+          "stddev_ns": 9327394,
+          "avg_ts": 40.994058,
+          "stddev_ts": 0.030602,
+          "samples_ns": [
+            12481812515,
+            12487099841,
+            12499946791
+          ],
+          "samples_ts": [
+            41.0197,
+            41.0023,
+            40.9602
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 99
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:28:59.467055+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:28:47Z\",\n    \"avg_ns\": 605159035,\n    \"stddev_ns\": 225689,\n    \"avg_ts\": 211.514667,\n    \"stddev_ts\": 0.078408,\n    \"samples_ns\": [ 605131213, 604949899, 605395994 ],\n    \"samples_ts\": [ 211.524, 211.588, 211.432 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:28:50Z\",\n    \"avg_ns\": 3058915154,\n    \"stddev_ns\": 2221862,\n    \"avg_ts\": 41.844915,\n    \"stddev_ts\": 0.030368,\n    \"samples_ns\": [ 3056995502, 3058402991, 3061346971 ],\n    \"samples_ts\": [ 41.8712, 41.8519, 41.8117 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:28:47Z",
+          "avg_ns": 605159035,
+          "stddev_ns": 225689,
+          "avg_ts": 211.514667,
+          "stddev_ts": 0.078408,
+          "samples_ns": [
+            605131213,
+            604949899,
+            605395994
+          ],
+          "samples_ts": [
+            211.524,
+            211.588,
+            211.432
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:28:50Z",
+          "avg_ns": 3058915154,
+          "stddev_ns": 2221862,
+          "avg_ts": 41.844915,
+          "stddev_ts": 0.030368,
+          "samples_ns": [
+            3056995502,
+            3058402991,
+            3061346971
+          ],
+          "samples_ts": [
+            41.8712,
+            41.8519,
+            41.8117
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 100
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:29:39.995310+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:29:00Z\",\n    \"avg_ns\": 603818013,\n    \"stddev_ns\": 348133,\n    \"avg_ts\": 211.984448,\n    \"stddev_ts\": 0.122198,\n    \"samples_ns\": [ 603514492, 603741512, 604198035 ],\n    \"samples_ts\": [ 212.091, 212.011, 211.851 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:29:02Z\",\n    \"avg_ns\": 12442476918,\n    \"stddev_ns\": 7776455,\n    \"avg_ts\": 41.149374,\n    \"stddev_ts\": 0.025725,\n    \"samples_ns\": [ 12433778923, 12444894099, 12448757732 ],\n    \"samples_ts\": [ 41.1781, 41.1414, 41.1286 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:29:00Z",
+          "avg_ns": 603818013,
+          "stddev_ns": 348133,
+          "avg_ts": 211.984448,
+          "stddev_ts": 0.122198,
+          "samples_ns": [
+            603514492,
+            603741512,
+            604198035
+          ],
+          "samples_ts": [
+            212.091,
+            212.011,
+            211.851
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:29:02Z",
+          "avg_ns": 12442476918,
+          "stddev_ns": 7776455,
+          "avg_ts": 41.149374,
+          "stddev_ts": 0.025725,
+          "samples_ns": [
+            12433778923,
+            12444894099,
+            12448757732
+          ],
+          "samples_ts": [
+            41.1781,
+            41.1414,
+            41.1286
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 101
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:30:00.059677+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:29:40Z\",\n    \"avg_ns\": 2526000887,\n    \"stddev_ns\": 1185865,\n    \"avg_ts\": 202.691962,\n    \"stddev_ts\": 0.094997,\n    \"samples_ns\": [ 2527070084, 2524728835, 2526203744 ],\n    \"samples_ts\": [ 202.606, 202.794, 202.676 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:29:50Z\",\n    \"avg_ns\": 3057721373,\n    \"stddev_ns\": 1241809,\n    \"avg_ts\": 41.861242,\n    \"stddev_ts\": 0.017004,\n    \"samples_ns\": [ 3058698428, 3058141753, 3056323938 ],\n    \"samples_ts\": [ 41.8479, 41.8555, 41.8804 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:29:40Z",
+          "avg_ns": 2526000887,
+          "stddev_ns": 1185865,
+          "avg_ts": 202.691962,
+          "stddev_ts": 0.094997,
+          "samples_ns": [
+            2527070084,
+            2524728835,
+            2526203744
+          ],
+          "samples_ts": [
+            202.606,
+            202.794,
+            202.676
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:29:50Z",
+          "avg_ns": 3057721373,
+          "stddev_ns": 1241809,
+          "avg_ts": 41.861242,
+          "stddev_ts": 0.017004,
+          "samples_ns": [
+            3058698428,
+            3058141753,
+            3056323938
+          ],
+          "samples_ts": [
+            41.8479,
+            41.8555,
+            41.8804
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 102
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:30:48.277456+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:30:00Z\",\n    \"avg_ns\": 2524466405,\n    \"stddev_ns\": 2291597,\n    \"avg_ts\": 202.815248,\n    \"stddev_ts\": 0.184111,\n    \"samples_ns\": [ 2526000223, 2525565488, 2521833506 ],\n    \"samples_ts\": [ 202.692, 202.727, 203.027 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:30:10Z\",\n    \"avg_ns\": 12442378848,\n    \"stddev_ns\": 4613914,\n    \"avg_ts\": 41.149691,\n    \"stddev_ts\": 0.015261,\n    \"samples_ns\": [ 12446530325, 12437411374, 12443194845 ],\n    \"samples_ts\": [ 41.136, 41.1661, 41.147 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:30:00Z",
+          "avg_ns": 2524466405,
+          "stddev_ns": 2291597,
+          "avg_ts": 202.815248,
+          "stddev_ts": 0.184111,
+          "samples_ns": [
+            2526000223,
+            2525565488,
+            2521833506
+          ],
+          "samples_ts": [
+            202.692,
+            202.727,
+            203.027
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:30:10Z",
+          "avg_ns": 12442378848,
+          "stddev_ns": 4613914,
+          "avg_ts": 41.149691,
+          "stddev_ts": 0.015261,
+          "samples_ns": [
+            12446530325,
+            12437411374,
+            12443194845
+          ],
+          "samples_ts": [
+            41.136,
+            41.1661,
+            41.147
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 103
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:31:00.629359+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:30:48Z\",\n    \"avg_ns\": 604293907,\n    \"stddev_ns\": 369762,\n    \"avg_ts\": 211.817512,\n    \"stddev_ts\": 0.129592,\n    \"samples_ns\": [ 604688817, 603955886, 604237018 ],\n    \"samples_ts\": [ 211.679, 211.936, 211.837 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:30:51Z\",\n    \"avg_ns\": 3048637801,\n    \"stddev_ns\": 701829,\n    \"avg_ts\": 41.985966,\n    \"stddev_ts\": 0.009636,\n    \"samples_ns\": [ 3049303035, 3048702191, 3047908178 ],\n    \"samples_ts\": [ 41.9768, 41.9851, 41.996 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:30:48Z",
+          "avg_ns": 604293907,
+          "stddev_ns": 369762,
+          "avg_ts": 211.817512,
+          "stddev_ts": 0.129592,
+          "samples_ns": [
+            604688817,
+            603955886,
+            604237018
+          ],
+          "samples_ts": [
+            211.679,
+            211.936,
+            211.837
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:30:51Z",
+          "avg_ns": 3048637801,
+          "stddev_ns": 701829,
+          "avg_ts": 41.985966,
+          "stddev_ts": 0.009636,
+          "samples_ns": [
+            3049303035,
+            3048702191,
+            3047908178
+          ],
+          "samples_ts": [
+            41.9768,
+            41.9851,
+            41.996
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 104
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:31:41.212226+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:31:01Z\",\n    \"avg_ns\": 603444571,\n    \"stddev_ns\": 173794,\n    \"avg_ts\": 212.115599,\n    \"stddev_ts\": 0.060467,\n    \"samples_ns\": [ 603338484, 603352150, 603643080 ],\n    \"samples_ts\": [ 212.153, 212.148, 212.046 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:31:03Z\",\n    \"avg_ns\": 12461893815,\n    \"stddev_ns\": 1145782,\n    \"avg_ts\": 41.085248,\n    \"stddev_ts\": 0.003777,\n    \"samples_ns\": [ 12461288502, 12461177641, 12463215302 ],\n    \"samples_ts\": [ 41.0872, 41.0876, 41.0809 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:31:01Z",
+          "avg_ns": 603444571,
+          "stddev_ns": 173794,
+          "avg_ts": 212.115599,
+          "stddev_ts": 0.060467,
+          "samples_ns": [
+            603338484,
+            603352150,
+            603643080
+          ],
+          "samples_ts": [
+            212.153,
+            212.148,
+            212.046
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:31:03Z",
+          "avg_ns": 12461893815,
+          "stddev_ns": 1145782,
+          "avg_ts": 41.085248,
+          "stddev_ts": 0.003777,
+          "samples_ns": [
+            12461288502,
+            12461177641,
+            12463215302
+          ],
+          "samples_ts": [
+            41.0872,
+            41.0876,
+            41.0809
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 105
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:32:01.611334+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:31:41Z\",\n    \"avg_ns\": 2614930019,\n    \"stddev_ns\": 1737526,\n    \"avg_ts\": 195.798796,\n    \"stddev_ts\": 0.130001,\n    \"samples_ns\": [ 2616910142, 2614215128, 2613664788 ],\n    \"samples_ts\": [ 195.651, 195.852, 195.894 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:31:52Z\",\n    \"avg_ns\": 3050724254,\n    \"stddev_ns\": 562394,\n    \"avg_ts\": 41.957251,\n    \"stddev_ts\": 0.007735,\n    \"samples_ns\": [ 3051260144, 3050138655, 3050773963 ],\n    \"samples_ts\": [ 41.9499, 41.9653, 41.9566 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:31:41Z",
+          "avg_ns": 2614930019,
+          "stddev_ns": 1737526,
+          "avg_ts": 195.798796,
+          "stddev_ts": 0.130001,
+          "samples_ns": [
+            2616910142,
+            2614215128,
+            2613664788
+          ],
+          "samples_ts": [
+            195.651,
+            195.852,
+            195.894
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:31:52Z",
+          "avg_ns": 3050724254,
+          "stddev_ns": 562394,
+          "avg_ts": 41.957251,
+          "stddev_ts": 0.007735,
+          "samples_ns": [
+            3051260144,
+            3050138655,
+            3050773963
+          ],
+          "samples_ts": [
+            41.9499,
+            41.9653,
+            41.9566
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 106
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:32:50.145952+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:32:02Z\",\n    \"avg_ns\": 2613378681,\n    \"stddev_ns\": 355463,\n    \"avg_ts\": 195.914970,\n    \"stddev_ts\": 0.026089,\n    \"samples_ns\": [ 2613776703, 2613131587, 2613227755 ],\n    \"samples_ts\": [ 195.885, 195.933, 195.926 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:32:12Z\",\n    \"avg_ns\": 12430610782,\n    \"stddev_ns\": 8682935,\n    \"avg_ts\": 41.188657,\n    \"stddev_ts\": 0.028769,\n    \"samples_ns\": [ 12438825010, 12421527389, 12431479949 ],\n    \"samples_ts\": [ 41.1614, 41.2188, 41.1858 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:32:02Z",
+          "avg_ns": 2613378681,
+          "stddev_ns": 355463,
+          "avg_ts": 195.91497,
+          "stddev_ts": 0.026089,
+          "samples_ns": [
+            2613776703,
+            2613131587,
+            2613227755
+          ],
+          "samples_ts": [
+            195.885,
+            195.933,
+            195.926
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:32:12Z",
+          "avg_ns": 12430610782,
+          "stddev_ns": 8682935,
+          "avg_ts": 41.188657,
+          "stddev_ts": 0.028769,
+          "samples_ns": [
+            12438825010,
+            12421527389,
+            12431479949
+          ],
+          "samples_ts": [
+            41.1614,
+            41.2188,
+            41.1858
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 107
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:33:02.819122+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:32:50Z\",\n    \"avg_ns\": 472272391,\n    \"stddev_ns\": 93295,\n    \"avg_ts\": 271.030036,\n    \"stddev_ts\": 0.052073,\n    \"samples_ns\": [ 472342751, 472304435, 472169988 ],\n    \"samples_ts\": [ 270.99, 271.012, 271.089 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:32:52Z\",\n    \"avg_ns\": 3330254528,\n    \"stddev_ns\": 13958623,\n    \"avg_ts\": 38.435951,\n    \"stddev_ts\": 0.161201,\n    \"samples_ns\": [ 3315659523, 3331629260, 3343474802 ],\n    \"samples_ts\": [ 38.6047, 38.4196, 38.2835 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:32:50Z",
+          "avg_ns": 472272391,
+          "stddev_ns": 93295,
+          "avg_ts": 271.030036,
+          "stddev_ts": 0.052073,
+          "samples_ns": [
+            472342751,
+            472304435,
+            472169988
+          ],
+          "samples_ts": [
+            270.99,
+            271.012,
+            271.089
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:32:52Z",
+          "avg_ns": 3330254528,
+          "stddev_ns": 13958623,
+          "avg_ts": 38.435951,
+          "stddev_ts": 0.161201,
+          "samples_ns": [
+            3315659523,
+            3331629260,
+            3343474802
+          ],
+          "samples_ts": [
+            38.6047,
+            38.4196,
+            38.2835
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 108
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:33:46.065778+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:33:03Z\",\n    \"avg_ns\": 473779682,\n    \"stddev_ns\": 535501,\n    \"avg_ts\": 270.167999,\n    \"stddev_ts\": 0.305265,\n    \"samples_ns\": [ 473958944, 473178010, 474202093 ],\n    \"samples_ts\": [ 270.066, 270.511, 269.927 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:33:05Z\",\n    \"avg_ns\": 13521776771,\n    \"stddev_ns\": 38263472,\n    \"avg_ts\": 37.865049,\n    \"stddev_ts\": 0.107282,\n    \"samples_ns\": [ 13478800051, 13534387215, 13552143049 ],\n    \"samples_ts\": [ 37.9856, 37.8296, 37.78 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:33:03Z",
+          "avg_ns": 473779682,
+          "stddev_ns": 535501,
+          "avg_ts": 270.167999,
+          "stddev_ts": 0.305265,
+          "samples_ns": [
+            473958944,
+            473178010,
+            474202093
+          ],
+          "samples_ts": [
+            270.066,
+            270.511,
+            269.927
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:33:05Z",
+          "avg_ns": 13521776771,
+          "stddev_ns": 38263472,
+          "avg_ts": 37.865049,
+          "stddev_ts": 0.107282,
+          "samples_ns": [
+            13478800051,
+            13534387215,
+            13552143049
+          ],
+          "samples_ts": [
+            37.9856,
+            37.8296,
+            37.78
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 109
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:34:04.617511+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:33:46Z\",\n    \"avg_ns\": 1923342776,\n    \"stddev_ns\": 3936382,\n    \"avg_ts\": 266.203941,\n    \"stddev_ts\": 0.544116,\n    \"samples_ns\": [ 1927884368, 1920924589, 1921219373 ],\n    \"samples_ts\": [ 265.576, 266.538, 266.497 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:33:54Z\",\n    \"avg_ns\": 3341401414,\n    \"stddev_ns\": 63237844,\n    \"avg_ts\": 38.316393,\n    \"stddev_ts\": 0.722307,\n    \"samples_ns\": [ 3283354510, 3332058961, 3408790771 ],\n    \"samples_ts\": [ 38.9845, 38.4147, 37.55 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:33:46Z",
+          "avg_ns": 1923342776,
+          "stddev_ns": 3936382,
+          "avg_ts": 266.203941,
+          "stddev_ts": 0.544116,
+          "samples_ns": [
+            1927884368,
+            1920924589,
+            1921219373
+          ],
+          "samples_ts": [
+            265.576,
+            266.538,
+            266.497
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:33:54Z",
+          "avg_ns": 3341401414,
+          "stddev_ns": 63237844,
+          "avg_ts": 38.316393,
+          "stddev_ts": 0.722307,
+          "samples_ns": [
+            3283354510,
+            3332058961,
+            3408790771
+          ],
+          "samples_ts": [
+            38.9845,
+            38.4147,
+            37.55
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 110
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:34:55.076208+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:34:05Z\",\n    \"avg_ns\": 1919202906,\n    \"stddev_ns\": 4359550,\n    \"avg_ts\": 266.778336,\n    \"stddev_ts\": 0.605145,\n    \"samples_ns\": [ 1924235193, 1916781152, 1916592375 ],\n    \"samples_ts\": [ 266.08, 267.114, 267.141 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:34:13Z\",\n    \"avg_ns\": 13984311366,\n    \"stddev_ns\": 108551428,\n    \"avg_ts\": 36.613933,\n    \"stddev_ts\": 0.285200,\n    \"samples_ns\": [ 13862265302, 14020596140, 14070072656 ],\n    \"samples_ts\": [ 36.9348, 36.5177, 36.3893 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:34:05Z",
+          "avg_ns": 1919202906,
+          "stddev_ns": 4359550,
+          "avg_ts": 266.778336,
+          "stddev_ts": 0.605145,
+          "samples_ns": [
+            1924235193,
+            1916781152,
+            1916592375
+          ],
+          "samples_ts": [
+            266.08,
+            267.114,
+            267.141
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:34:13Z",
+          "avg_ns": 13984311366,
+          "stddev_ns": 108551428,
+          "avg_ts": 36.613933,
+          "stddev_ts": 0.2852,
+          "samples_ns": [
+            13862265302,
+            14020596140,
+            14070072656
+          ],
+          "samples_ts": [
+            36.9348,
+            36.5177,
+            36.3893
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 111
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:35:08.031014+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:34:55Z\",\n    \"avg_ns\": 476679239,\n    \"stddev_ns\": 3711477,\n    \"avg_ts\": 268.535210,\n    \"stddev_ts\": 2.085377,\n    \"samples_ns\": [ 475794978, 480752919, 473489821 ],\n    \"samples_ts\": [ 269.023, 266.249, 270.333 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:34:57Z\",\n    \"avg_ns\": 3418477277,\n    \"stddev_ns\": 62137279,\n    \"avg_ts\": 37.451908,\n    \"stddev_ts\": 0.687945,\n    \"samples_ns\": [ 3346765091, 3456347405, 3452319337 ],\n    \"samples_ts\": [ 38.2459, 37.0333, 37.0765 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:34:55Z",
+          "avg_ns": 476679239,
+          "stddev_ns": 3711477,
+          "avg_ts": 268.53521,
+          "stddev_ts": 2.085377,
+          "samples_ns": [
+            475794978,
+            480752919,
+            473489821
+          ],
+          "samples_ts": [
+            269.023,
+            266.249,
+            270.333
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:34:57Z",
+          "avg_ns": 3418477277,
+          "stddev_ns": 62137279,
+          "avg_ts": 37.451908,
+          "stddev_ts": 0.687945,
+          "samples_ns": [
+            3346765091,
+            3456347405,
+            3452319337
+          ],
+          "samples_ts": [
+            38.2459,
+            37.0333,
+            37.0765
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 112
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:35:52.997932+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:35:08Z\",\n    \"avg_ns\": 473487570,\n    \"stddev_ns\": 1264074,\n    \"avg_ts\": 270.335728,\n    \"stddev_ts\": 0.720534,\n    \"samples_ns\": [ 472555753, 474925963, 472980996 ],\n    \"samples_ts\": [ 270.868, 269.516, 270.624 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:35:10Z\",\n    \"avg_ns\": 14085111092,\n    \"stddev_ns\": 71860879,\n    \"avg_ts\": 36.351072,\n    \"stddev_ts\": 0.185572,\n    \"samples_ns\": [ 14010612518, 14090715007, 14154005753 ],\n    \"samples_ts\": [ 36.5437, 36.336, 36.1735 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:35:08Z",
+          "avg_ns": 473487570,
+          "stddev_ns": 1264074,
+          "avg_ts": 270.335728,
+          "stddev_ts": 0.720534,
+          "samples_ns": [
+            472555753,
+            474925963,
+            472980996
+          ],
+          "samples_ts": [
+            270.868,
+            269.516,
+            270.624
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:35:10Z",
+          "avg_ns": 14085111092,
+          "stddev_ns": 71860879,
+          "avg_ts": 36.351072,
+          "stddev_ts": 0.185572,
+          "samples_ns": [
+            14010612518,
+            14090715007,
+            14154005753
+          ],
+          "samples_ts": [
+            36.5437,
+            36.336,
+            36.1735
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 113
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:36:11.862189+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:35:53Z\",\n    \"avg_ns\": 1963513698,\n    \"stddev_ns\": 7362392,\n    \"avg_ts\": 260.759469,\n    \"stddev_ts\": 0.975771,\n    \"samples_ns\": [ 1971944368, 1960244787, 1958351940 ],\n    \"samples_ts\": [ 259.642, 261.192, 261.444 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:36:01Z\",\n    \"avg_ns\": 3402328550,\n    \"stddev_ns\": 56626339,\n    \"avg_ts\": 37.628309,\n    \"stddev_ts\": 0.632273,\n    \"samples_ns\": [ 3337023040, 3432164369, 3437798241 ],\n    \"samples_ts\": [ 38.3575, 37.2943, 37.2331 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:35:53Z",
+          "avg_ns": 1963513698,
+          "stddev_ns": 7362392,
+          "avg_ts": 260.759469,
+          "stddev_ts": 0.975771,
+          "samples_ns": [
+            1971944368,
+            1960244787,
+            1958351940
+          ],
+          "samples_ts": [
+            259.642,
+            261.192,
+            261.444
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:36:01Z",
+          "avg_ns": 3402328550,
+          "stddev_ns": 56626339,
+          "avg_ts": 37.628309,
+          "stddev_ts": 0.632273,
+          "samples_ns": [
+            3337023040,
+            3432164369,
+            3437798241
+          ],
+          "samples_ts": [
+            38.3575,
+            37.2943,
+            37.2331
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 114
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:37:02.388944+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:36:12Z\",\n    \"avg_ns\": 1964216292,\n    \"stddev_ns\": 2538490,\n    \"avg_ts\": 260.664048,\n    \"stddev_ts\": 0.337023,\n    \"samples_ns\": [ 1965672234, 1961286009, 1965690635 ],\n    \"samples_ts\": [ 260.471, 261.053, 260.468 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:36:20Z\",\n    \"avg_ns\": 13947193409,\n    \"stddev_ns\": 49471544,\n    \"avg_ts\": 36.710203,\n    \"stddev_ts\": 0.130441,\n    \"samples_ns\": [ 13891036218, 13966205739, 13984338271 ],\n    \"samples_ts\": [ 36.8583, 36.6599, 36.6124 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:36:12Z",
+          "avg_ns": 1964216292,
+          "stddev_ns": 2538490,
+          "avg_ts": 260.664048,
+          "stddev_ts": 0.337023,
+          "samples_ns": [
+            1965672234,
+            1961286009,
+            1965690635
+          ],
+          "samples_ts": [
+            260.471,
+            261.053,
+            260.468
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:36:20Z",
+          "avg_ns": 13947193409,
+          "stddev_ns": 49471544,
+          "avg_ts": 36.710203,
+          "stddev_ts": 0.130441,
+          "samples_ns": [
+            13891036218,
+            13966205739,
+            13984338271
+          ],
+          "samples_ts": [
+            36.8583,
+            36.6599,
+            36.6124
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 115
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:37:15.363336+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:37:03Z\",\n    \"avg_ns\": 473565773,\n    \"stddev_ns\": 431312,\n    \"avg_ts\": 270.289952,\n    \"stddev_ts\": 0.245508,\n    \"samples_ns\": [ 473162412, 474018568, 473516341 ],\n    \"samples_ts\": [ 270.52, 270.032, 270.318 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:37:04Z\",\n    \"avg_ns\": 3427880444,\n    \"stddev_ns\": 28062166,\n    \"avg_ts\": 37.342535,\n    \"stddev_ts\": 0.307154,\n    \"samples_ns\": [ 3395477213, 3444013401, 3444150719 ],\n    \"samples_ts\": [ 37.6972, 37.1659, 37.1645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:37:03Z",
+          "avg_ns": 473565773,
+          "stddev_ns": 431312,
+          "avg_ts": 270.289952,
+          "stddev_ts": 0.245508,
+          "samples_ns": [
+            473162412,
+            474018568,
+            473516341
+          ],
+          "samples_ts": [
+            270.52,
+            270.032,
+            270.318
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:37:04Z",
+          "avg_ns": 3427880444,
+          "stddev_ns": 28062166,
+          "avg_ts": 37.342535,
+          "stddev_ts": 0.307154,
+          "samples_ns": [
+            3395477213,
+            3444013401,
+            3444150719
+          ],
+          "samples_ts": [
+            37.6972,
+            37.1659,
+            37.1645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 116
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:38:00.139636+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:37:16Z\",\n    \"avg_ns\": 474004753,\n    \"stddev_ns\": 4203016,\n    \"avg_ts\": 270.053576,\n    \"stddev_ts\": 2.383704,\n    \"samples_ns\": [ 478795989, 472278050, 470940222 ],\n    \"samples_ts\": [ 267.337, 271.027, 271.797 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:37:17Z\",\n    \"avg_ns\": 14017944250,\n    \"stddev_ns\": 48998977,\n    \"avg_ts\": 36.524912,\n    \"stddev_ts\": 0.127875,\n    \"samples_ns\": [ 13962727291, 14056239230, 14034866230 ],\n    \"samples_ts\": [ 36.6691, 36.4251, 36.4806 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:37:16Z",
+          "avg_ns": 474004753,
+          "stddev_ns": 4203016,
+          "avg_ts": 270.053576,
+          "stddev_ts": 2.383704,
+          "samples_ns": [
+            478795989,
+            472278050,
+            470940222
+          ],
+          "samples_ts": [
+            267.337,
+            271.027,
+            271.797
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:37:17Z",
+          "avg_ns": 14017944250,
+          "stddev_ns": 48998977,
+          "avg_ts": 36.524912,
+          "stddev_ts": 0.127875,
+          "samples_ns": [
+            13962727291,
+            14056239230,
+            14034866230
+          ],
+          "samples_ts": [
+            36.6691,
+            36.4251,
+            36.4806
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 117
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:38:19.679707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:38:00Z\",\n    \"avg_ns\": 2125254351,\n    \"stddev_ns\": 1195493,\n    \"avg_ts\": 240.912391,\n    \"stddev_ts\": 0.135353,\n    \"samples_ns\": [ 2123900986, 2125704640, 2126157429 ],\n    \"samples_ts\": [ 241.066, 240.861, 240.81 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:38:09Z\",\n    \"avg_ns\": 3412167823,\n    \"stddev_ns\": 50231097,\n    \"avg_ts\": 37.518276,\n    \"stddev_ts\": 0.557044,\n    \"samples_ns\": [ 3354170021, 3440574308, 3441759141 ],\n    \"samples_ts\": [ 38.1615, 37.2031, 37.1903 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:38:00Z",
+          "avg_ns": 2125254351,
+          "stddev_ns": 1195493,
+          "avg_ts": 240.912391,
+          "stddev_ts": 0.135353,
+          "samples_ns": [
+            2123900986,
+            2125704640,
+            2126157429
+          ],
+          "samples_ts": [
+            241.066,
+            240.861,
+            240.81
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:38:09Z",
+          "avg_ns": 3412167823,
+          "stddev_ns": 50231097,
+          "avg_ts": 37.518276,
+          "stddev_ts": 0.557044,
+          "samples_ns": [
+            3354170021,
+            3440574308,
+            3441759141
+          ],
+          "samples_ts": [
+            38.1615,
+            37.2031,
+            37.1903
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 118
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:39:11.143974+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:38:20Z\",\n    \"avg_ns\": 2098306806,\n    \"stddev_ns\": 2557373,\n    \"avg_ts\": 244.006503,\n    \"stddev_ts\": 0.297133,\n    \"samples_ns\": [ 2101259168, 2096857650, 2096803601 ],\n    \"samples_ts\": [ 243.663, 244.175, 244.181 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:38:28Z\",\n    \"avg_ns\": 14087170731,\n    \"stddev_ns\": 69946572,\n    \"avg_ts\": 36.345725,\n    \"stddev_ts\": 0.180974,\n    \"samples_ns\": [ 14006587665, 14122741456, 14132183073 ],\n    \"samples_ts\": [ 36.5542, 36.2536, 36.2294 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:38:20Z",
+          "avg_ns": 2098306806,
+          "stddev_ns": 2557373,
+          "avg_ts": 244.006503,
+          "stddev_ts": 0.297133,
+          "samples_ns": [
+            2101259168,
+            2096857650,
+            2096803601
+          ],
+          "samples_ts": [
+            243.663,
+            244.175,
+            244.181
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:38:28Z",
+          "avg_ns": 14087170731,
+          "stddev_ns": 69946572,
+          "avg_ts": 36.345725,
+          "stddev_ts": 0.180974,
+          "samples_ns": [
+            14006587665,
+            14122741456,
+            14132183073
+          ],
+          "samples_ts": [
+            36.5542,
+            36.2536,
+            36.2294
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 119
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:39:24.104212+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:39:11Z\",\n    \"avg_ns\": 473317641,\n    \"stddev_ns\": 576167,\n    \"avg_ts\": 270.431767,\n    \"stddev_ts\": 0.328748,\n    \"samples_ns\": [ 472910805, 473976426, 473065693 ],\n    \"samples_ts\": [ 270.664, 270.056, 270.576 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:39:13Z\",\n    \"avg_ns\": 3423347923,\n    \"stddev_ns\": 39596646,\n    \"avg_ts\": 37.393656,\n    \"stddev_ts\": 0.435385,\n    \"samples_ns\": [ 3377698965, 3448412272, 3443932534 ],\n    \"samples_ts\": [ 37.8956, 37.1185, 37.1668 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:39:11Z",
+          "avg_ns": 473317641,
+          "stddev_ns": 576167,
+          "avg_ts": 270.431767,
+          "stddev_ts": 0.328748,
+          "samples_ns": [
+            472910805,
+            473976426,
+            473065693
+          ],
+          "samples_ts": [
+            270.664,
+            270.056,
+            270.576
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:39:13Z",
+          "avg_ns": 3423347923,
+          "stddev_ns": 39596646,
+          "avg_ts": 37.393656,
+          "stddev_ts": 0.435385,
+          "samples_ns": [
+            3377698965,
+            3448412272,
+            3443932534
+          ],
+          "samples_ts": [
+            37.8956,
+            37.1185,
+            37.1668
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 120
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:40:08.930678+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:39:24Z\",\n    \"avg_ns\": 473442567,\n    \"stddev_ns\": 1090638,\n    \"avg_ts\": 270.361099,\n    \"stddev_ts\": 0.622645,\n    \"samples_ns\": [ 472397638, 474573788, 473356275 ],\n    \"samples_ts\": [ 270.958, 269.716, 270.409 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:39:26Z\",\n    \"avg_ns\": 14036424757,\n    \"stddev_ns\": 74333906,\n    \"avg_ts\": 36.477209,\n    \"stddev_ts\": 0.193705,\n    \"samples_ns\": [ 13951616487, 14090282430, 14067375355 ],\n    \"samples_ts\": [ 36.6983, 36.3371, 36.3963 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:39:24Z",
+          "avg_ns": 473442567,
+          "stddev_ns": 1090638,
+          "avg_ts": 270.361099,
+          "stddev_ts": 0.622645,
+          "samples_ns": [
+            472397638,
+            474573788,
+            473356275
+          ],
+          "samples_ts": [
+            270.958,
+            269.716,
+            270.409
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:39:26Z",
+          "avg_ns": 14036424757,
+          "stddev_ns": 74333906,
+          "avg_ts": 36.477209,
+          "stddev_ts": 0.193705,
+          "samples_ns": [
+            13951616487,
+            14090282430,
+            14067375355
+          ],
+          "samples_ts": [
+            36.6983,
+            36.3371,
+            36.3963
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 121
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:40:27.657139+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:40:09Z\",\n    \"avg_ns\": 1940895187,\n    \"stddev_ns\": 3139665,\n    \"avg_ts\": 263.796261,\n    \"stddev_ts\": 0.426560,\n    \"samples_ns\": [ 1944262746, 1938048585, 1940374230 ],\n    \"samples_ts\": [ 263.339, 264.183, 263.867 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:40:17Z\",\n    \"avg_ns\": 3388308839,\n    \"stddev_ns\": 66876154,\n    \"avg_ts\": 37.786851,\n    \"stddev_ts\": 0.751892,\n    \"samples_ns\": [ 3313775001, 3408084423, 3443067094 ],\n    \"samples_ts\": [ 38.6266, 37.5578, 37.1762 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:40:09Z",
+          "avg_ns": 1940895187,
+          "stddev_ns": 3139665,
+          "avg_ts": 263.796261,
+          "stddev_ts": 0.42656,
+          "samples_ns": [
+            1944262746,
+            1938048585,
+            1940374230
+          ],
+          "samples_ts": [
+            263.339,
+            264.183,
+            263.867
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:40:17Z",
+          "avg_ns": 3388308839,
+          "stddev_ns": 66876154,
+          "avg_ts": 37.786851,
+          "stddev_ts": 0.751892,
+          "samples_ns": [
+            3313775001,
+            3408084423,
+            3443067094
+          ],
+          "samples_ts": [
+            38.6266,
+            37.5578,
+            37.1762
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 122
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:41:18.124691+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:40:28Z\",\n    \"avg_ns\": 1923211706,\n    \"stddev_ns\": 2331562,\n    \"avg_ts\": 266.221602,\n    \"stddev_ts\": 0.322857,\n    \"samples_ns\": [ 1924676130, 1920524007, 1924434983 ],\n    \"samples_ts\": [ 266.019, 266.594, 266.052 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:40:36Z\",\n    \"avg_ns\": 13989453486,\n    \"stddev_ns\": 48532239,\n    \"avg_ts\": 36.599294,\n    \"stddev_ts\": 0.127223,\n    \"samples_ns\": [ 13933466484, 14019557895, 14015336080 ],\n    \"samples_ts\": [ 36.7461, 36.5204, 36.5314 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:40:28Z",
+          "avg_ns": 1923211706,
+          "stddev_ns": 2331562,
+          "avg_ts": 266.221602,
+          "stddev_ts": 0.322857,
+          "samples_ns": [
+            1924676130,
+            1920524007,
+            1924434983
+          ],
+          "samples_ts": [
+            266.019,
+            266.594,
+            266.052
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:40:36Z",
+          "avg_ns": 13989453486,
+          "stddev_ns": 48532239,
+          "avg_ts": 36.599294,
+          "stddev_ts": 0.127223,
+          "samples_ns": [
+            13933466484,
+            14019557895,
+            14015336080
+          ],
+          "samples_ts": [
+            36.7461,
+            36.5204,
+            36.5314
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 123
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:41:31.147041+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:41:18Z\",\n    \"avg_ns\": 473013484,\n    \"stddev_ns\": 1061295,\n    \"avg_ts\": 270.606303,\n    \"stddev_ts\": 0.607942,\n    \"samples_ns\": [ 471788337, 473601372, 473650743 ],\n    \"samples_ts\": [ 271.308, 270.269, 270.241 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:41:20Z\",\n    \"avg_ns\": 3435162638,\n    \"stddev_ns\": 48275020,\n    \"avg_ts\": 37.266643,\n    \"stddev_ts\": 0.527557,\n    \"samples_ns\": [ 3380079243, 3470109428, 3455299245 ],\n    \"samples_ts\": [ 37.8689, 36.8864, 37.0445 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:41:18Z",
+          "avg_ns": 473013484,
+          "stddev_ns": 1061295,
+          "avg_ts": 270.606303,
+          "stddev_ts": 0.607942,
+          "samples_ns": [
+            471788337,
+            473601372,
+            473650743
+          ],
+          "samples_ts": [
+            271.308,
+            270.269,
+            270.241
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:41:20Z",
+          "avg_ns": 3435162638,
+          "stddev_ns": 48275020,
+          "avg_ts": 37.266643,
+          "stddev_ts": 0.527557,
+          "samples_ns": [
+            3380079243,
+            3470109428,
+            3455299245
+          ],
+          "samples_ts": [
+            37.8689,
+            36.8864,
+            37.0445
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 124
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:42:15.968904+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:41:31Z\",\n    \"avg_ns\": 472972843,\n    \"stddev_ns\": 835572,\n    \"avg_ts\": 270.629209,\n    \"stddev_ts\": 0.478390,\n    \"samples_ns\": [ 472017143, 473338063, 473563324 ],\n    \"samples_ts\": [ 271.177, 270.42, 270.291 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:41:33Z\",\n    \"avg_ns\": 14042868853,\n    \"stddev_ns\": 52734976,\n    \"avg_ts\": 36.460130,\n    \"stddev_ts\": 0.137176,\n    \"samples_ns\": [ 13982878244, 14063820116, 14081908201 ],\n    \"samples_ts\": [ 36.6162, 36.4055, 36.3587 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:41:31Z",
+          "avg_ns": 472972843,
+          "stddev_ns": 835572,
+          "avg_ts": 270.629209,
+          "stddev_ts": 0.47839,
+          "samples_ns": [
+            472017143,
+            473338063,
+            473563324
+          ],
+          "samples_ts": [
+            271.177,
+            270.42,
+            270.291
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:41:33Z",
+          "avg_ns": 14042868853,
+          "stddev_ns": 52734976,
+          "avg_ts": 36.46013,
+          "stddev_ts": 0.137176,
+          "samples_ns": [
+            13982878244,
+            14063820116,
+            14081908201
+          ],
+          "samples_ts": [
+            36.6162,
+            36.4055,
+            36.3587
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 125
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:42:34.811555+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:42:16Z\",\n    \"avg_ns\": 1962041324,\n    \"stddev_ns\": 1851195,\n    \"avg_ts\": 260.952865,\n    \"stddev_ts\": 0.246226,\n    \"samples_ns\": [ 1960150707, 1963850403, 1962122862 ],\n    \"samples_ts\": [ 261.204, 260.712, 260.942 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:42:24Z\",\n    \"avg_ns\": 3397784758,\n    \"stddev_ns\": 64955245,\n    \"avg_ts\": 37.680886,\n    \"stddev_ts\": 0.728380,\n    \"samples_ns\": [ 3322783218, 3435792728, 3434778329 ],\n    \"samples_ts\": [ 38.5219, 37.2549, 37.2659 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:42:16Z",
+          "avg_ns": 1962041324,
+          "stddev_ns": 1851195,
+          "avg_ts": 260.952865,
+          "stddev_ts": 0.246226,
+          "samples_ns": [
+            1960150707,
+            1963850403,
+            1962122862
+          ],
+          "samples_ts": [
+            261.204,
+            260.712,
+            260.942
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:42:24Z",
+          "avg_ns": 3397784758,
+          "stddev_ns": 64955245,
+          "avg_ts": 37.680886,
+          "stddev_ts": 0.72838,
+          "samples_ns": [
+            3322783218,
+            3435792728,
+            3434778329
+          ],
+          "samples_ts": [
+            38.5219,
+            37.2549,
+            37.2659
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 126
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:43:25.405880+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:42:35Z\",\n    \"avg_ns\": 1959691577,\n    \"stddev_ns\": 3365977,\n    \"avg_ts\": 261.266116,\n    \"stddev_ts\": 0.448418,\n    \"samples_ns\": [ 1956891248, 1963425508, 1958757976 ],\n    \"samples_ts\": [ 261.639, 260.769, 261.39 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:42:43Z\",\n    \"avg_ns\": 13984182371,\n    \"stddev_ns\": 109475959,\n    \"avg_ts\": 36.614296,\n    \"stddev_ts\": 0.287706,\n    \"samples_ns\": [ 13860366665, 14068163393, 14024017056 ],\n    \"samples_ts\": [ 36.9399, 36.3942, 36.5088 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:42:35Z",
+          "avg_ns": 1959691577,
+          "stddev_ns": 3365977,
+          "avg_ts": 261.266116,
+          "stddev_ts": 0.448418,
+          "samples_ns": [
+            1956891248,
+            1963425508,
+            1958757976
+          ],
+          "samples_ts": [
+            261.639,
+            260.769,
+            261.39
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:42:43Z",
+          "avg_ns": 13984182371,
+          "stddev_ns": 109475959,
+          "avg_ts": 36.614296,
+          "stddev_ts": 0.287706,
+          "samples_ns": [
+            13860366665,
+            14068163393,
+            14024017056
+          ],
+          "samples_ts": [
+            36.9399,
+            36.3942,
+            36.5088
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 127
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:43:38.377503+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:43:26Z\",\n    \"avg_ns\": 472515124,\n    \"stddev_ns\": 311095,\n    \"avg_ts\": 270.890878,\n    \"stddev_ts\": 0.178342,\n    \"samples_ns\": [ 472501266, 472832917, 472211189 ],\n    \"samples_ts\": [ 270.899, 270.709, 271.065 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:43:27Z\",\n    \"avg_ns\": 3423665683,\n    \"stddev_ns\": 57128957,\n    \"avg_ts\": 37.393837,\n    \"stddev_ts\": 0.630039,\n    \"samples_ns\": [ 3357701568, 3457160070, 3456135413 ],\n    \"samples_ts\": [ 38.1213, 37.0246, 37.0356 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:43:26Z",
+          "avg_ns": 472515124,
+          "stddev_ns": 311095,
+          "avg_ts": 270.890878,
+          "stddev_ts": 0.178342,
+          "samples_ns": [
+            472501266,
+            472832917,
+            472211189
+          ],
+          "samples_ts": [
+            270.899,
+            270.709,
+            271.065
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:43:27Z",
+          "avg_ns": 3423665683,
+          "stddev_ns": 57128957,
+          "avg_ts": 37.393837,
+          "stddev_ts": 0.630039,
+          "samples_ns": [
+            3357701568,
+            3457160070,
+            3456135413
+          ],
+          "samples_ts": [
+            38.1213,
+            37.0246,
+            37.0356
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 128
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:44:22.835130+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:43:39Z\",\n    \"avg_ns\": 473152486,\n    \"stddev_ns\": 454264,\n    \"avg_ts\": 270.526062,\n    \"stddev_ts\": 0.259584,\n    \"samples_ns\": [ 472872947, 473676638, 472907873 ],\n    \"samples_ts\": [ 270.686, 270.227, 270.666 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:43:40Z\",\n    \"avg_ns\": 13921777851,\n    \"stddev_ns\": 56132013,\n    \"avg_ts\": 36.777311,\n    \"stddev_ts\": 0.148588,\n    \"samples_ns\": [ 13857862719, 13944411743, 13963059092 ],\n    \"samples_ts\": [ 36.9465, 36.7172, 36.6682 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:43:39Z",
+          "avg_ns": 473152486,
+          "stddev_ns": 454264,
+          "avg_ts": 270.526062,
+          "stddev_ts": 0.259584,
+          "samples_ns": [
+            472872947,
+            473676638,
+            472907873
+          ],
+          "samples_ts": [
+            270.686,
+            270.227,
+            270.666
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:43:40Z",
+          "avg_ns": 13921777851,
+          "stddev_ns": 56132013,
+          "avg_ts": 36.777311,
+          "stddev_ts": 0.148588,
+          "samples_ns": [
+            13857862719,
+            13944411743,
+            13963059092
+          ],
+          "samples_ts": [
+            36.9465,
+            36.7172,
+            36.6682
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 129
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:44:42.315942+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:44:23Z\",\n    \"avg_ns\": 2124024057,\n    \"stddev_ns\": 2262156,\n    \"avg_ts\": 241.052066,\n    \"stddev_ts\": 0.256653,\n    \"samples_ns\": [ 2126344363, 2121825944, 2123901865 ],\n    \"samples_ts\": [ 240.789, 241.302, 241.066 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:44:32Z\",\n    \"avg_ns\": 3389563409,\n    \"stddev_ns\": 73083651,\n    \"avg_ts\": 37.774821,\n    \"stddev_ts\": 0.824075,\n    \"samples_ns\": [ 3305805796, 3422514177, 3440370256 ],\n    \"samples_ts\": [ 38.7198, 37.3994, 37.2053 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:44:23Z",
+          "avg_ns": 2124024057,
+          "stddev_ns": 2262156,
+          "avg_ts": 241.052066,
+          "stddev_ts": 0.256653,
+          "samples_ns": [
+            2126344363,
+            2121825944,
+            2123901865
+          ],
+          "samples_ts": [
+            240.789,
+            241.302,
+            241.066
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:44:32Z",
+          "avg_ns": 3389563409,
+          "stddev_ns": 73083651,
+          "avg_ts": 37.774821,
+          "stddev_ts": 0.824075,
+          "samples_ns": [
+            3305805796,
+            3422514177,
+            3440370256
+          ],
+          "samples_ts": [
+            38.7198,
+            37.3994,
+            37.2053
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 130
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:45:33.471087+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:44:43Z\",\n    \"avg_ns\": 2099559505,\n    \"stddev_ns\": 4797241,\n    \"avg_ts\": 243.861524,\n    \"stddev_ts\": 0.556550,\n    \"samples_ns\": [ 2105000186, 2097739537, 2095938793 ],\n    \"samples_ts\": [ 243.23, 244.072, 244.282 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:44:51Z\",\n    \"avg_ns\": 13982969198,\n    \"stddev_ns\": 95423452,\n    \"avg_ts\": 36.617112,\n    \"stddev_ts\": 0.250819,\n    \"samples_ns\": [ 13873467317, 14048333285, 14027106992 ],\n    \"samples_ts\": [ 36.905, 36.4456, 36.5008 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:44:43Z",
+          "avg_ns": 2099559505,
+          "stddev_ns": 4797241,
+          "avg_ts": 243.861524,
+          "stddev_ts": 0.55655,
+          "samples_ns": [
+            2105000186,
+            2097739537,
+            2095938793
+          ],
+          "samples_ts": [
+            243.23,
+            244.072,
+            244.282
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:44:51Z",
+          "avg_ns": 13982969198,
+          "stddev_ns": 95423452,
+          "avg_ts": 36.617112,
+          "stddev_ts": 0.250819,
+          "samples_ns": [
+            13873467317,
+            14048333285,
+            14027106992
+          ],
+          "samples_ts": [
+            36.905,
+            36.4456,
+            36.5008
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 131
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:45:46.435318+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:45:34Z\",\n    \"avg_ns\": 472959738,\n    \"stddev_ns\": 941764,\n    \"avg_ts\": 270.636860,\n    \"stddev_ts\": 0.539133,\n    \"samples_ns\": [ 473100826, 473822999, 471955389 ],\n    \"samples_ts\": [ 270.555, 270.143, 271.212 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:45:36Z\",\n    \"avg_ns\": 3425115663,\n    \"stddev_ns\": 50388790,\n    \"avg_ts\": 37.376434,\n    \"stddev_ts\": 0.553961,\n    \"samples_ns\": [ 3367805980, 3462472630, 3445068380 ],\n    \"samples_ts\": [ 38.0069, 36.9678, 37.1546 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:45:34Z",
+          "avg_ns": 472959738,
+          "stddev_ns": 941764,
+          "avg_ts": 270.63686,
+          "stddev_ts": 0.539133,
+          "samples_ns": [
+            473100826,
+            473822999,
+            471955389
+          ],
+          "samples_ts": [
+            270.555,
+            270.143,
+            271.212
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:45:36Z",
+          "avg_ns": 3425115663,
+          "stddev_ns": 50388790,
+          "avg_ts": 37.376434,
+          "stddev_ts": 0.553961,
+          "samples_ns": [
+            3367805980,
+            3462472630,
+            3445068380
+          ],
+          "samples_ts": [
+            38.0069,
+            36.9678,
+            37.1546
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 132
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:46:31.272406+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:45:47Z\",\n    \"avg_ns\": 472844619,\n    \"stddev_ns\": 593967,\n    \"avg_ts\": 270.702318,\n    \"stddev_ts\": 0.339892,\n    \"samples_ns\": [ 472692538, 473499841, 472341478 ],\n    \"samples_ts\": [ 270.789, 270.327, 270.99 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:45:49Z\",\n    \"avg_ns\": 14048287399,\n    \"stddev_ns\": 65913155,\n    \"avg_ts\": 36.446260,\n    \"stddev_ts\": 0.171458,\n    \"samples_ns\": [ 13972322920, 14090339574, 14082199705 ],\n    \"samples_ts\": [ 36.6439, 36.337, 36.358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:45:47Z",
+          "avg_ns": 472844619,
+          "stddev_ns": 593967,
+          "avg_ts": 270.702318,
+          "stddev_ts": 0.339892,
+          "samples_ns": [
+            472692538,
+            473499841,
+            472341478
+          ],
+          "samples_ts": [
+            270.789,
+            270.327,
+            270.99
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:45:49Z",
+          "avg_ns": 14048287399,
+          "stddev_ns": 65913155,
+          "avg_ts": 36.44626,
+          "stddev_ts": 0.171458,
+          "samples_ns": [
+            13972322920,
+            14090339574,
+            14082199705
+          ],
+          "samples_ts": [
+            36.6439,
+            36.337,
+            36.358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 133
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:46:50.048556+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:46:31Z\",\n    \"avg_ns\": 1940666402,\n    \"stddev_ns\": 2220568,\n    \"avg_ts\": 263.827130,\n    \"stddev_ts\": 0.301623,\n    \"samples_ns\": [ 1939271492, 1939501157, 1943226558 ],\n    \"samples_ts\": [ 264.017, 263.985, 263.479 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:46:39Z\",\n    \"avg_ns\": 3403626371,\n    \"stddev_ns\": 68964006,\n    \"avg_ts\": 37.617362,\n    \"stddev_ts\": 0.770944,\n    \"samples_ns\": [ 3324273820, 3437522785, 3449082510 ],\n    \"samples_ts\": [ 38.5047, 37.2361, 37.1113 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:46:31Z",
+          "avg_ns": 1940666402,
+          "stddev_ns": 2220568,
+          "avg_ts": 263.82713,
+          "stddev_ts": 0.301623,
+          "samples_ns": [
+            1939271492,
+            1939501157,
+            1943226558
+          ],
+          "samples_ts": [
+            264.017,
+            263.985,
+            263.479
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:46:39Z",
+          "avg_ns": 3403626371,
+          "stddev_ns": 68964006,
+          "avg_ts": 37.617362,
+          "stddev_ts": 0.770944,
+          "samples_ns": [
+            3324273820,
+            3437522785,
+            3449082510
+          ],
+          "samples_ts": [
+            38.5047,
+            37.2361,
+            37.1113
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 134
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:47:40.438423+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:46:50Z\",\n    \"avg_ns\": 1924382167,\n    \"stddev_ns\": 3827927,\n    \"avg_ts\": 266.060119,\n    \"stddev_ts\": 0.528811,\n    \"samples_ns\": [ 1921103085, 1923455604, 1928587814 ],\n    \"samples_ts\": [ 266.514, 266.188, 265.479 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:46:58Z\",\n    \"avg_ns\": 13962419575,\n    \"stddev_ns\": 44877260,\n    \"avg_ts\": 36.670115,\n    \"stddev_ts\": 0.117998,\n    \"samples_ns\": [ 13912965009, 13973743914, 14000549803 ],\n    \"samples_ts\": [ 36.8002, 36.6401, 36.57 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:46:50Z",
+          "avg_ns": 1924382167,
+          "stddev_ns": 3827927,
+          "avg_ts": 266.060119,
+          "stddev_ts": 0.528811,
+          "samples_ns": [
+            1921103085,
+            1923455604,
+            1928587814
+          ],
+          "samples_ts": [
+            266.514,
+            266.188,
+            265.479
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:46:58Z",
+          "avg_ns": 13962419575,
+          "stddev_ns": 44877260,
+          "avg_ts": 36.670115,
+          "stddev_ts": 0.117998,
+          "samples_ns": [
+            13912965009,
+            13973743914,
+            14000549803
+          ],
+          "samples_ts": [
+            36.8002,
+            36.6401,
+            36.57
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 135
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:47:53.388566+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:47:41Z\",\n    \"avg_ns\": 473106841,\n    \"stddev_ns\": 651441,\n    \"avg_ts\": 270.552337,\n    \"stddev_ts\": 0.372417,\n    \"samples_ns\": [ 473182805, 473716606, 472421113 ],\n    \"samples_ts\": [ 270.509, 270.204, 270.945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:47:43Z\",\n    \"avg_ns\": 3419979192,\n    \"stddev_ns\": 49098943,\n    \"avg_ts\": 37.432314,\n    \"stddev_ts\": 0.541867,\n    \"samples_ns\": [ 3363315174, 3446701489, 3449920915 ],\n    \"samples_ts\": [ 38.0577, 37.137, 37.1023 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:47:41Z",
+          "avg_ns": 473106841,
+          "stddev_ns": 651441,
+          "avg_ts": 270.552337,
+          "stddev_ts": 0.372417,
+          "samples_ns": [
+            473182805,
+            473716606,
+            472421113
+          ],
+          "samples_ts": [
+            270.509,
+            270.204,
+            270.945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:47:43Z",
+          "avg_ns": 3419979192,
+          "stddev_ns": 49098943,
+          "avg_ts": 37.432314,
+          "stddev_ts": 0.541867,
+          "samples_ns": [
+            3363315174,
+            3446701489,
+            3449920915
+          ],
+          "samples_ts": [
+            38.0577,
+            37.137,
+            37.1023
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 136
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:48:38.220961+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:47:54Z\",\n    \"avg_ns\": 472583109,\n    \"stddev_ns\": 183122,\n    \"avg_ts\": 270.851857,\n    \"stddev_ts\": 0.104189,\n    \"samples_ns\": [ 472455127, 472791241, 472502960 ],\n    \"samples_ts\": [ 270.925, 270.733, 270.898 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:47:55Z\",\n    \"avg_ns\": 14047652041,\n    \"stddev_ns\": 40239255,\n    \"avg_ts\": 36.447571,\n    \"stddev_ts\": 0.104238,\n    \"samples_ns\": [ 14020673248, 14028380085, 14093902790 ],\n    \"samples_ts\": [ 36.5175, 36.4974, 36.3278 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:47:54Z",
+          "avg_ns": 472583109,
+          "stddev_ns": 183122,
+          "avg_ts": 270.851857,
+          "stddev_ts": 0.104189,
+          "samples_ns": [
+            472455127,
+            472791241,
+            472502960
+          ],
+          "samples_ts": [
+            270.925,
+            270.733,
+            270.898
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:47:55Z",
+          "avg_ns": 14047652041,
+          "stddev_ns": 40239255,
+          "avg_ts": 36.447571,
+          "stddev_ts": 0.104238,
+          "samples_ns": [
+            14020673248,
+            14028380085,
+            14093902790
+          ],
+          "samples_ts": [
+            36.5175,
+            36.4974,
+            36.3278
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 137
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:48:57.052066+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:48:38Z\",\n    \"avg_ns\": 1959070334,\n    \"stddev_ns\": 1800020,\n    \"avg_ts\": 261.348599,\n    \"stddev_ts\": 0.240089,\n    \"samples_ns\": [ 1960778569, 1959240700, 1957191734 ],\n    \"samples_ts\": [ 261.121, 261.326, 261.599 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:48:46Z\",\n    \"avg_ns\": 3387484431,\n    \"stddev_ns\": 73015738,\n    \"avg_ts\": 37.798005,\n    \"stddev_ts\": 0.824905,\n    \"samples_ns\": [ 3303248656, 3426512776, 3432691862 ],\n    \"samples_ts\": [ 38.7497, 37.3558, 37.2885 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:48:38Z",
+          "avg_ns": 1959070334,
+          "stddev_ns": 1800020,
+          "avg_ts": 261.348599,
+          "stddev_ts": 0.240089,
+          "samples_ns": [
+            1960778569,
+            1959240700,
+            1957191734
+          ],
+          "samples_ts": [
+            261.121,
+            261.326,
+            261.599
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:48:46Z",
+          "avg_ns": 3387484431,
+          "stddev_ns": 73015738,
+          "avg_ts": 37.798005,
+          "stddev_ts": 0.824905,
+          "samples_ns": [
+            3303248656,
+            3426512776,
+            3432691862
+          ],
+          "samples_ts": [
+            38.7497,
+            37.3558,
+            37.2885
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 138
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:49:47.614359+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:48:57Z\",\n    \"avg_ns\": 1961089948,\n    \"stddev_ns\": 2301143,\n    \"avg_ts\": 261.079544,\n    \"stddev_ts\": 0.306076,\n    \"samples_ns\": [ 1960312870, 1959278840, 1963678136 ],\n    \"samples_ts\": [ 261.183, 261.321, 260.735 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:49:05Z\",\n    \"avg_ns\": 13973516072,\n    \"stddev_ns\": 28687018,\n    \"avg_ts\": 36.640845,\n    \"stddev_ts\": 0.075221,\n    \"samples_ns\": [ 13944869379, 13973436077, 14002242761 ],\n    \"samples_ts\": [ 36.716, 36.641, 36.5656 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:48:57Z",
+          "avg_ns": 1961089948,
+          "stddev_ns": 2301143,
+          "avg_ts": 261.079544,
+          "stddev_ts": 0.306076,
+          "samples_ns": [
+            1960312870,
+            1959278840,
+            1963678136
+          ],
+          "samples_ts": [
+            261.183,
+            261.321,
+            260.735
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:49:05Z",
+          "avg_ns": 13973516072,
+          "stddev_ns": 28687018,
+          "avg_ts": 36.640845,
+          "stddev_ts": 0.075221,
+          "samples_ns": [
+            13944869379,
+            13973436077,
+            14002242761
+          ],
+          "samples_ts": [
+            36.716,
+            36.641,
+            36.5656
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 139
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:50:00.611210+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:49:48Z\",\n    \"avg_ns\": 472656785,\n    \"stddev_ns\": 989989,\n    \"avg_ts\": 270.810402,\n    \"stddev_ts\": 0.566845,\n    \"samples_ns\": [ 473673301, 471696691, 472600365 ],\n    \"samples_ts\": [ 270.228, 271.361, 270.842 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:49:50Z\",\n    \"avg_ns\": 3434121106,\n    \"stddev_ns\": 49560230,\n    \"avg_ts\": 37.278220,\n    \"stddev_ts\": 0.542464,\n    \"samples_ns\": [ 3376957605, 3460365432, 3465040282 ],\n    \"samples_ts\": [ 37.9039, 36.9903, 36.9404 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:49:48Z",
+          "avg_ns": 472656785,
+          "stddev_ns": 989989,
+          "avg_ts": 270.810402,
+          "stddev_ts": 0.566845,
+          "samples_ns": [
+            473673301,
+            471696691,
+            472600365
+          ],
+          "samples_ts": [
+            270.228,
+            271.361,
+            270.842
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:49:50Z",
+          "avg_ns": 3434121106,
+          "stddev_ns": 49560230,
+          "avg_ts": 37.27822,
+          "stddev_ts": 0.542464,
+          "samples_ns": [
+            3376957605,
+            3460365432,
+            3465040282
+          ],
+          "samples_ts": [
+            37.9039,
+            36.9903,
+            36.9404
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 140
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:50:45.653575+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:50:01Z\",\n    \"avg_ns\": 472370038,\n    \"stddev_ns\": 1280679,\n    \"avg_ts\": 270.975331,\n    \"stddev_ts\": 0.735291,\n    \"samples_ns\": [ 473448129, 470954661, 472707325 ],\n    \"samples_ts\": [ 270.357, 271.788, 270.781 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:50:03Z\",\n    \"avg_ns\": 14113972922,\n    \"stddev_ns\": 57762461,\n    \"avg_ts\": 36.276513,\n    \"stddev_ts\": 0.148646,\n    \"samples_ns\": [ 14051171598, 14125918580, 14164828588 ],\n    \"samples_ts\": [ 36.4382, 36.2454, 36.1459 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:50:01Z",
+          "avg_ns": 472370038,
+          "stddev_ns": 1280679,
+          "avg_ts": 270.975331,
+          "stddev_ts": 0.735291,
+          "samples_ns": [
+            473448129,
+            470954661,
+            472707325
+          ],
+          "samples_ts": [
+            270.357,
+            271.788,
+            270.781
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:50:03Z",
+          "avg_ns": 14113972922,
+          "stddev_ns": 57762461,
+          "avg_ts": 36.276513,
+          "stddev_ts": 0.148646,
+          "samples_ns": [
+            14051171598,
+            14125918580,
+            14164828588
+          ],
+          "samples_ts": [
+            36.4382,
+            36.2454,
+            36.1459
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 141
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:51:04.929843+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:50:46Z\",\n    \"avg_ns\": 2073749664,\n    \"stddev_ns\": 2969593,\n    \"avg_ts\": 246.896098,\n    \"stddev_ts\": 0.353742,\n    \"samples_ns\": [ 2074545044, 2076240575, 2070463373 ],\n    \"samples_ts\": [ 246.801, 246.6, 247.288 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:50:54Z\",\n    \"avg_ns\": 3387569226,\n    \"stddev_ns\": 51313820,\n    \"avg_ts\": 37.791036,\n    \"stddev_ts\": 0.577366,\n    \"samples_ns\": [ 3328490586, 3421031622, 3413185471 ],\n    \"samples_ts\": [ 38.4559, 37.4156, 37.5016 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:50:46Z",
+          "avg_ns": 2073749664,
+          "stddev_ns": 2969593,
+          "avg_ts": 246.896098,
+          "stddev_ts": 0.353742,
+          "samples_ns": [
+            2074545044,
+            2076240575,
+            2070463373
+          ],
+          "samples_ts": [
+            246.801,
+            246.6,
+            247.288
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:50:54Z",
+          "avg_ns": 3387569226,
+          "stddev_ns": 51313820,
+          "avg_ts": 37.791036,
+          "stddev_ts": 0.577366,
+          "samples_ns": [
+            3328490586,
+            3421031622,
+            3413185471
+          ],
+          "samples_ts": [
+            38.4559,
+            37.4156,
+            37.5016
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 142
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:51:56.089420+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:51:05Z\",\n    \"avg_ns\": 2065794507,\n    \"stddev_ns\": 2369173,\n    \"avg_ts\": 247.846747,\n    \"stddev_ts\": 0.284107,\n    \"samples_ns\": [ 2063883795, 2068445407, 2065054319 ],\n    \"samples_ts\": [ 248.076, 247.529, 247.935 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 270M Q4_K - Medium\",\n    \"model_size\": 246587904,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:51:13Z\",\n    \"avg_ns\": 14028467314,\n    \"stddev_ns\": 61347538,\n    \"avg_ts\": 36.497682,\n    \"stddev_ts\": 0.159983,\n    \"samples_ns\": [ 13958181865, 14055964181, 14071255898 ],\n    \"samples_ts\": [ 36.681, 36.4258, 36.3862 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:51:05Z",
+          "avg_ns": 2065794507,
+          "stddev_ns": 2369173,
+          "avg_ts": 247.846747,
+          "stddev_ts": 0.284107,
+          "samples_ns": [
+            2063883795,
+            2068445407,
+            2065054319
+          ],
+          "samples_ts": [
+            248.076,
+            247.529,
+            247.935
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+          "model_type": "gemma3 270M Q4_K - Medium",
+          "model_size": 246587904,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:51:13Z",
+          "avg_ns": 14028467314,
+          "stddev_ns": 61347538,
+          "avg_ts": 36.497682,
+          "stddev_ts": 0.159983,
+          "samples_ns": [
+            13958181865,
+            14055964181,
+            14071255898
+          ],
+          "samples_ts": [
+            36.681,
+            36.4258,
+            36.3862
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 143
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:52:15.723824+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:52:00Z\",\n    \"avg_ns\": 1086425396,\n    \"stddev_ns\": 201664,\n    \"avg_ts\": 117.817572,\n    \"stddev_ts\": 0.021276,\n    \"samples_ns\": [ 1086367656, 1086643991, 1086264543 ],\n    \"samples_ts\": [ 117.824, 117.794, 117.835 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:52:04Z\",\n    \"avg_ns\": 3721992773,\n    \"stddev_ns\": 322151,\n    \"avg_ts\": 34.390180,\n    \"stddev_ts\": 0.002923,\n    \"samples_ns\": [ 3722266054, 3722066009, 3721646257 ],\n    \"samples_ts\": [ 34.3877, 34.3895, 34.3934 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:52:00Z",
+          "avg_ns": 1086425396,
+          "stddev_ns": 201664,
+          "avg_ts": 117.817572,
+          "stddev_ts": 0.021276,
+          "samples_ns": [
+            1086367656,
+            1086643991,
+            1086264543
+          ],
+          "samples_ts": [
+            117.824,
+            117.794,
+            117.835
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:52:04Z",
+          "avg_ns": 3721992773,
+          "stddev_ns": 322151,
+          "avg_ts": 34.39018,
+          "stddev_ts": 0.002923,
+          "samples_ns": [
+            3722266054,
+            3722066009,
+            3721646257
+          ],
+          "samples_ts": [
+            34.3877,
+            34.3895,
+            34.3934
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 144
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:53:07.092812+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:52:16Z\",\n    \"avg_ns\": 1087206228,\n    \"stddev_ns\": 104540,\n    \"avg_ts\": 117.732954,\n    \"stddev_ts\": 0.011321,\n    \"samples_ns\": [ 1087308248, 1087099338, 1087211098 ],\n    \"samples_ts\": [ 117.722, 117.745, 117.732 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:52:20Z\",\n    \"avg_ns\": 15395769689,\n    \"stddev_ns\": 6380086,\n    \"avg_ts\": 33.255892,\n    \"stddev_ts\": 0.013784,\n    \"samples_ns\": [ 15401081936, 15388693099, 15397534032 ],\n    \"samples_ts\": [ 33.2444, 33.2712, 33.2521 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:52:16Z",
+          "avg_ns": 1087206228,
+          "stddev_ns": 104540,
+          "avg_ts": 117.732954,
+          "stddev_ts": 0.011321,
+          "samples_ns": [
+            1087308248,
+            1087099338,
+            1087211098
+          ],
+          "samples_ts": [
+            117.722,
+            117.745,
+            117.732
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:52:20Z",
+          "avg_ns": 15395769689,
+          "stddev_ns": 6380086,
+          "avg_ts": 33.255892,
+          "stddev_ts": 0.013784,
+          "samples_ns": [
+            15401081936,
+            15388693099,
+            15397534032
+          ],
+          "samples_ts": [
+            33.2444,
+            33.2712,
+            33.2521
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 145
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:53:37.466148+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:53:07Z\",\n    \"avg_ns\": 4542619115,\n    \"stddev_ns\": 351969,\n    \"avg_ts\": 112.710309,\n    \"stddev_ts\": 0.008572,\n    \"samples_ns\": [ 4542905547, 4542716332, 4542235467 ],\n    \"samples_ts\": [ 112.703, 112.708, 112.72 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:53:25Z\",\n    \"avg_ns\": 3805891497,\n    \"stddev_ns\": 3752215,\n    \"avg_ts\": 33.632089,\n    \"stddev_ts\": 0.033134,\n    \"samples_ns\": [ 3803691478, 3810223419, 3803759595 ],\n    \"samples_ts\": [ 33.6515, 33.5938, 33.6509 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:53:07Z",
+          "avg_ns": 4542619115,
+          "stddev_ns": 351969,
+          "avg_ts": 112.710309,
+          "stddev_ts": 0.008572,
+          "samples_ns": [
+            4542905547,
+            4542716332,
+            4542235467
+          ],
+          "samples_ts": [
+            112.703,
+            112.708,
+            112.72
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:53:25Z",
+          "avg_ns": 3805891497,
+          "stddev_ns": 3752215,
+          "avg_ts": 33.632089,
+          "stddev_ts": 0.033134,
+          "samples_ns": [
+            3803691478,
+            3810223419,
+            3803759595
+          ],
+          "samples_ts": [
+            33.6515,
+            33.5938,
+            33.6509
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 146
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:54:42.763249+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:53:38Z\",\n    \"avg_ns\": 4543284506,\n    \"stddev_ns\": 778774,\n    \"avg_ts\": 112.693803,\n    \"stddev_ts\": 0.019171,\n    \"samples_ns\": [ 4543052377, 4542654254, 4544146889 ],\n    \"samples_ts\": [ 112.7, 112.709, 112.672 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:53:56Z\",\n    \"avg_ns\": 15445071260,\n    \"stddev_ns\": 40062574,\n    \"avg_ts\": 33.149883,\n    \"stddev_ts\": 0.086115,\n    \"samples_ns\": [ 15398823538, 15469120093, 15467270150 ],\n    \"samples_ts\": [ 33.2493, 33.0982, 33.1022 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:53:38Z",
+          "avg_ns": 4543284506,
+          "stddev_ns": 778774,
+          "avg_ts": 112.693803,
+          "stddev_ts": 0.019171,
+          "samples_ns": [
+            4543052377,
+            4542654254,
+            4544146889
+          ],
+          "samples_ts": [
+            112.7,
+            112.709,
+            112.672
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:53:56Z",
+          "avg_ns": 15445071260,
+          "stddev_ns": 40062574,
+          "avg_ts": 33.149883,
+          "stddev_ts": 0.086115,
+          "samples_ns": [
+            15398823538,
+            15469120093,
+            15467270150
+          ],
+          "samples_ts": [
+            33.2493,
+            33.0982,
+            33.1022
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 147
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:54:59.018311+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:54:43Z\",\n    \"avg_ns\": 1086727154,\n    \"stddev_ns\": 299756,\n    \"avg_ts\": 117.784861,\n    \"stddev_ts\": 0.032097,\n    \"samples_ns\": [ 1086396689, 1086816396, 1086968379 ],\n    \"samples_ts\": [ 117.821, 117.775, 117.759 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:54:47Z\",\n    \"avg_ns\": 3709799123,\n    \"stddev_ns\": 1864185,\n    \"avg_ts\": 34.503222,\n    \"stddev_ts\": 0.017333,\n    \"samples_ns\": [ 3707662251, 3710651199, 3711083920 ],\n    \"samples_ts\": [ 34.5231, 34.4953, 34.4913 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:54:43Z",
+          "avg_ns": 1086727154,
+          "stddev_ns": 299756,
+          "avg_ts": 117.784861,
+          "stddev_ts": 0.032097,
+          "samples_ns": [
+            1086396689,
+            1086816396,
+            1086968379
+          ],
+          "samples_ts": [
+            117.821,
+            117.775,
+            117.759
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:54:47Z",
+          "avg_ns": 3709799123,
+          "stddev_ns": 1864185,
+          "avg_ts": 34.503222,
+          "stddev_ts": 0.017333,
+          "samples_ns": [
+            3707662251,
+            3710651199,
+            3711083920
+          ],
+          "samples_ts": [
+            34.5231,
+            34.4953,
+            34.4913
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 148
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:55:50.883418+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:54:59Z\",\n    \"avg_ns\": 1087401346,\n    \"stddev_ns\": 65150,\n    \"avg_ts\": 117.711828,\n    \"stddev_ts\": 0.007053,\n    \"samples_ns\": [ 1087398729, 1087337543, 1087467766 ],\n    \"samples_ts\": [ 117.712, 117.719, 117.705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:55:04Z\",\n    \"avg_ns\": 15577493417,\n    \"stddev_ns\": 16873210,\n    \"avg_ts\": 32.867958,\n    \"stddev_ts\": 0.035622,\n    \"samples_ns\": [ 15558206088, 15584748685, 15589525478 ],\n    \"samples_ts\": [ 32.9087, 32.8526, 32.8426 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:54:59Z",
+          "avg_ns": 1087401346,
+          "stddev_ns": 65150,
+          "avg_ts": 117.711828,
+          "stddev_ts": 0.007053,
+          "samples_ns": [
+            1087398729,
+            1087337543,
+            1087467766
+          ],
+          "samples_ts": [
+            117.712,
+            117.719,
+            117.705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:55:04Z",
+          "avg_ns": 15577493417,
+          "stddev_ns": 16873210,
+          "avg_ts": 32.867958,
+          "stddev_ts": 0.035622,
+          "samples_ns": [
+            15558206088,
+            15584748685,
+            15589525478
+          ],
+          "samples_ts": [
+            32.9087,
+            32.8526,
+            32.8426
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 149
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:56:21.036788+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:55:51Z\",\n    \"avg_ns\": 4550810653,\n    \"stddev_ns\": 297999,\n    \"avg_ts\": 112.507428,\n    \"stddev_ts\": 0.006980,\n    \"samples_ns\": [ 4551075150, 4550513380, 4550843431 ],\n    \"samples_ts\": [ 112.501, 112.515, 112.507 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:56:09Z\",\n    \"avg_ns\": 3721871362,\n    \"stddev_ns\": 644953,\n    \"avg_ts\": 34.391302,\n    \"stddev_ts\": 0.005959,\n    \"samples_ns\": [ 3722613910, 3721549396, 3721450780 ],\n    \"samples_ts\": [ 34.3844, 34.3943, 34.3952 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:55:51Z",
+          "avg_ns": 4550810653,
+          "stddev_ns": 297999,
+          "avg_ts": 112.507428,
+          "stddev_ts": 0.00698,
+          "samples_ns": [
+            4551075150,
+            4550513380,
+            4550843431
+          ],
+          "samples_ts": [
+            112.501,
+            112.515,
+            112.507
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:56:09Z",
+          "avg_ns": 3721871362,
+          "stddev_ns": 644953,
+          "avg_ts": 34.391302,
+          "stddev_ts": 0.005959,
+          "samples_ns": [
+            3722613910,
+            3721549396,
+            3721450780
+          ],
+          "samples_ts": [
+            34.3844,
+            34.3943,
+            34.3952
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 150
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:57:27.118630+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:56:21Z\",\n    \"avg_ns\": 4556608958,\n    \"stddev_ns\": 203722,\n    \"avg_ts\": 112.364261,\n    \"stddev_ts\": 0.004438,\n    \"samples_ns\": [ 4556410710, 4556654113, 4556762053 ],\n    \"samples_ts\": [ 112.369, 112.363, 112.36 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:56:39Z\",\n    \"avg_ns\": 15688529647,\n    \"stddev_ns\": 10692741,\n    \"avg_ts\": 32.635318,\n    \"stddev_ts\": 0.022249,\n    \"samples_ns\": [ 15676184547, 15694655084, 15694749312 ],\n    \"samples_ts\": [ 32.661, 32.6226, 32.6224 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:56:21Z",
+          "avg_ns": 4556608958,
+          "stddev_ns": 203722,
+          "avg_ts": 112.364261,
+          "stddev_ts": 0.004438,
+          "samples_ns": [
+            4556410710,
+            4556654113,
+            4556762053
+          ],
+          "samples_ts": [
+            112.369,
+            112.363,
+            112.36
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:56:39Z",
+          "avg_ns": 15688529647,
+          "stddev_ns": 10692741,
+          "avg_ts": 32.635318,
+          "stddev_ts": 0.022249,
+          "samples_ns": [
+            15676184547,
+            15694655084,
+            15694749312
+          ],
+          "samples_ts": [
+            32.661,
+            32.6226,
+            32.6224
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 151
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:57:43.411931+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:57:27Z\",\n    \"avg_ns\": 1085563809,\n    \"stddev_ns\": 141007,\n    \"avg_ts\": 117.911080,\n    \"stddev_ts\": 0.014891,\n    \"samples_ns\": [ 1085468753, 1085501698, 1085720977 ],\n    \"samples_ts\": [ 117.921, 117.918, 117.894 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:57:32Z\",\n    \"avg_ns\": 3722088818,\n    \"stddev_ns\": 779869,\n    \"avg_ts\": 34.389293,\n    \"stddev_ts\": 0.007206,\n    \"samples_ns\": [ 3722469967, 3721191674, 3722604813 ],\n    \"samples_ts\": [ 34.3858, 34.3976, 34.3845 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:57:27Z",
+          "avg_ns": 1085563809,
+          "stddev_ns": 141007,
+          "avg_ts": 117.91108,
+          "stddev_ts": 0.014891,
+          "samples_ns": [
+            1085468753,
+            1085501698,
+            1085720977
+          ],
+          "samples_ts": [
+            117.921,
+            117.918,
+            117.894
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:57:32Z",
+          "avg_ns": 3722088818,
+          "stddev_ns": 779869,
+          "avg_ts": 34.389293,
+          "stddev_ts": 0.007206,
+          "samples_ns": [
+            3722469967,
+            3721191674,
+            3722604813
+          ],
+          "samples_ts": [
+            34.3858,
+            34.3976,
+            34.3845
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 152
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:58:34.649490+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:57:44Z\",\n    \"avg_ns\": 1086229119,\n    \"stddev_ns\": 211788,\n    \"avg_ts\": 117.838862,\n    \"stddev_ts\": 0.022696,\n    \"samples_ns\": [ 1086024179, 1086442350, 1086220829 ],\n    \"samples_ts\": [ 117.861, 117.816, 117.84 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:57:48Z\",\n    \"avg_ns\": 15357401400,\n    \"stddev_ns\": 24725491,\n    \"avg_ts\": 33.339031,\n    \"stddev_ts\": 0.053724,\n    \"samples_ns\": [ 15328919138, 15369942611, 15373342453 ],\n    \"samples_ts\": [ 33.4009, 33.3118, 33.3044 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:57:44Z",
+          "avg_ns": 1086229119,
+          "stddev_ns": 211788,
+          "avg_ts": 117.838862,
+          "stddev_ts": 0.022696,
+          "samples_ns": [
+            1086024179,
+            1086442350,
+            1086220829
+          ],
+          "samples_ts": [
+            117.861,
+            117.816,
+            117.84
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:57:48Z",
+          "avg_ns": 15357401400,
+          "stddev_ns": 24725491,
+          "avg_ts": 33.339031,
+          "stddev_ts": 0.053724,
+          "samples_ns": [
+            15328919138,
+            15369942611,
+            15373342453
+          ],
+          "samples_ts": [
+            33.4009,
+            33.3118,
+            33.3044
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 153
+    },
+    {
+      "timestamp_utc": "2025-12-08T21:59:05.793110+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:58:35Z\",\n    \"avg_ns\": 4800113579,\n    \"stddev_ns\": 909470,\n    \"avg_ts\": 106.664145,\n    \"stddev_ts\": 0.020091,\n    \"samples_ns\": [ 4800038029, 4801053162, 4799249548 ],\n    \"samples_ts\": [ 106.666, 106.643, 106.683 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:58:54Z\",\n    \"avg_ns\": 3719016912,\n    \"stddev_ns\": 12770882,\n    \"avg_ts\": 34.417968,\n    \"stddev_ts\": 0.117954,\n    \"samples_ns\": [ 3711290736, 3712002610, 3733757392 ],\n    \"samples_ts\": [ 34.4893, 34.4827, 34.2818 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:58:35Z",
+          "avg_ns": 4800113579,
+          "stddev_ns": 909470,
+          "avg_ts": 106.664145,
+          "stddev_ts": 0.020091,
+          "samples_ns": [
+            4800038029,
+            4801053162,
+            4799249548
+          ],
+          "samples_ts": [
+            106.666,
+            106.643,
+            106.683
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:58:54Z",
+          "avg_ns": 3719016912,
+          "stddev_ns": 12770882,
+          "avg_ts": 34.417968,
+          "stddev_ts": 0.117954,
+          "samples_ns": [
+            3711290736,
+            3712002610,
+            3733757392
+          ],
+          "samples_ts": [
+            34.4893,
+            34.4827,
+            34.2818
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 154
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:00:12.442953+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:59:06Z\",\n    \"avg_ns\": 4865920273,\n    \"stddev_ns\": 845940,\n    \"avg_ts\": 105.221619,\n    \"stddev_ts\": 0.018293,\n    \"samples_ns\": [ 4865060240, 4866751379, 4865949200 ],\n    \"samples_ts\": [ 105.24, 105.204, 105.221 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T21:59:25Z\",\n    \"avg_ns\": 15465409013,\n    \"stddev_ns\": 1925491,\n    \"avg_ts\": 33.106141,\n    \"stddev_ts\": 0.004122,\n    \"samples_ns\": [ 15463983674, 15464643906, 15467599459 ],\n    \"samples_ts\": [ 33.1092, 33.1078, 33.1015 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:59:06Z",
+          "avg_ns": 4865920273,
+          "stddev_ns": 845940,
+          "avg_ts": 105.221619,
+          "stddev_ts": 0.018293,
+          "samples_ns": [
+            4865060240,
+            4866751379,
+            4865949200
+          ],
+          "samples_ts": [
+            105.24,
+            105.204,
+            105.221
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T21:59:25Z",
+          "avg_ns": 15465409013,
+          "stddev_ns": 1925491,
+          "avg_ts": 33.106141,
+          "stddev_ts": 0.004122,
+          "samples_ns": [
+            15463983674,
+            15464643906,
+            15467599459
+          ],
+          "samples_ts": [
+            33.1092,
+            33.1078,
+            33.1015
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 155
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:00:28.901247+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:00:13Z\",\n    \"avg_ns\": 1086034722,\n    \"stddev_ns\": 126017,\n    \"avg_ts\": 117.859953,\n    \"stddev_ts\": 0.012706,\n    \"samples_ns\": [ 1085912160, 1086145422, 1086046586 ],\n    \"samples_ts\": [ 117.873, 117.848, 117.859 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:00:17Z\",\n    \"avg_ns\": 3775703011,\n    \"stddev_ns\": 608919,\n    \"avg_ts\": 33.900972,\n    \"stddev_ts\": 0.005411,\n    \"samples_ns\": [ 3776396045, 3775301625, 3775411365 ],\n    \"samples_ts\": [ 33.8948, 33.9046, 33.9036 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:00:13Z",
+          "avg_ns": 1086034722,
+          "stddev_ns": 126017,
+          "avg_ts": 117.859953,
+          "stddev_ts": 0.012706,
+          "samples_ns": [
+            1085912160,
+            1086145422,
+            1086046586
+          ],
+          "samples_ts": [
+            117.873,
+            117.848,
+            117.859
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:00:17Z",
+          "avg_ns": 3775703011,
+          "stddev_ns": 608919,
+          "avg_ts": 33.900972,
+          "stddev_ts": 0.005411,
+          "samples_ns": [
+            3776396045,
+            3775301625,
+            3775411365
+          ],
+          "samples_ts": [
+            33.8948,
+            33.9046,
+            33.9036
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 156
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:01:21.028642+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:00:29Z\",\n    \"avg_ns\": 1086757337,\n    \"stddev_ns\": 54323,\n    \"avg_ts\": 117.781584,\n    \"stddev_ts\": 0.005887,\n    \"samples_ns\": [ 1086748109, 1086815683, 1086708219 ],\n    \"samples_ts\": [ 117.783, 117.775, 117.787 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:00:33Z\",\n    \"avg_ns\": 15665225284,\n    \"stddev_ns\": 9029921,\n    \"avg_ts\": 32.683865,\n    \"stddev_ts\": 0.018842,\n    \"samples_ns\": [ 15669117580, 15654903835, 15671654439 ],\n    \"samples_ts\": [ 32.6757, 32.7054, 32.6704 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:00:29Z",
+          "avg_ns": 1086757337,
+          "stddev_ns": 54323,
+          "avg_ts": 117.781584,
+          "stddev_ts": 0.005887,
+          "samples_ns": [
+            1086748109,
+            1086815683,
+            1086708219
+          ],
+          "samples_ts": [
+            117.783,
+            117.775,
+            117.787
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:00:33Z",
+          "avg_ns": 15665225284,
+          "stddev_ns": 9029921,
+          "avg_ts": 32.683865,
+          "stddev_ts": 0.018842,
+          "samples_ns": [
+            15669117580,
+            15654903835,
+            15671654439
+          ],
+          "samples_ts": [
+            32.6757,
+            32.7054,
+            32.6704
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 157
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:01:51.371763+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:01:21Z\",\n    \"avg_ns\": 4544836670,\n    \"stddev_ns\": 281526,\n    \"avg_ts\": 112.655314,\n    \"stddev_ts\": 0.006566,\n    \"samples_ns\": [ 4545069386, 4544892214, 4544548412 ],\n    \"samples_ts\": [ 112.65, 112.654, 112.662 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:01:39Z\",\n    \"avg_ns\": 3791579029,\n    \"stddev_ns\": 928484,\n    \"avg_ts\": 33.759023,\n    \"stddev_ts\": 0.008266,\n    \"samples_ns\": [ 3790770043, 3792592816, 3791374228 ],\n    \"samples_ts\": [ 33.7662, 33.75, 33.7608 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:01:21Z",
+          "avg_ns": 4544836670,
+          "stddev_ns": 281526,
+          "avg_ts": 112.655314,
+          "stddev_ts": 0.006566,
+          "samples_ns": [
+            4545069386,
+            4544892214,
+            4544548412
+          ],
+          "samples_ts": [
+            112.65,
+            112.654,
+            112.662
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:01:39Z",
+          "avg_ns": 3791579029,
+          "stddev_ns": 928484,
+          "avg_ts": 33.759023,
+          "stddev_ts": 0.008266,
+          "samples_ns": [
+            3790770043,
+            3792592816,
+            3791374228
+          ],
+          "samples_ts": [
+            33.7662,
+            33.75,
+            33.7608
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 158
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:02:56.730142+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:01:52Z\",\n    \"avg_ns\": 4556566400,\n    \"stddev_ns\": 621507,\n    \"avg_ts\": 112.365312,\n    \"stddev_ts\": 0.015235,\n    \"samples_ns\": [ 4557275379, 4556143162, 4556280660 ],\n    \"samples_ts\": [ 112.348, 112.376, 112.372 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:02:10Z\",\n    \"avg_ns\": 15433290729,\n    \"stddev_ns\": 4334330,\n    \"avg_ts\": 33.175039,\n    \"stddev_ts\": 0.009310,\n    \"samples_ns\": [ 15428545989, 15434295208, 15437030992 ],\n    \"samples_ts\": [ 33.1852, 33.1729, 33.167 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:01:52Z",
+          "avg_ns": 4556566400,
+          "stddev_ns": 621507,
+          "avg_ts": 112.365312,
+          "stddev_ts": 0.015235,
+          "samples_ns": [
+            4557275379,
+            4556143162,
+            4556280660
+          ],
+          "samples_ts": [
+            112.348,
+            112.376,
+            112.372
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:02:10Z",
+          "avg_ns": 15433290729,
+          "stddev_ns": 4334330,
+          "avg_ts": 33.175039,
+          "stddev_ts": 0.00931,
+          "samples_ns": [
+            15428545989,
+            15434295208,
+            15437030992
+          ],
+          "samples_ts": [
+            33.1852,
+            33.1729,
+            33.167
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 159
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:03:13.276718+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:02:57Z\",\n    \"avg_ns\": 1086962380,\n    \"stddev_ns\": 145609,\n    \"avg_ts\": 117.759367,\n    \"stddev_ts\": 0.015775,\n    \"samples_ns\": [ 1086972177, 1087102844, 1086812119 ],\n    \"samples_ts\": [ 117.758, 117.744, 117.776 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:03:01Z\",\n    \"avg_ns\": 3789403432,\n    \"stddev_ns\": 1503342,\n    \"avg_ts\": 33.778408,\n    \"stddev_ts\": 0.013400,\n    \"samples_ns\": [ 3790973905, 3789258702, 3787977689 ],\n    \"samples_ts\": [ 33.7644, 33.7797, 33.7911 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:02:57Z",
+          "avg_ns": 1086962380,
+          "stddev_ns": 145609,
+          "avg_ts": 117.759367,
+          "stddev_ts": 0.015775,
+          "samples_ns": [
+            1086972177,
+            1087102844,
+            1086812119
+          ],
+          "samples_ts": [
+            117.758,
+            117.744,
+            117.776
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:03:01Z",
+          "avg_ns": 3789403432,
+          "stddev_ns": 1503342,
+          "avg_ts": 33.778408,
+          "stddev_ts": 0.0134,
+          "samples_ns": [
+            3790973905,
+            3789258702,
+            3787977689
+          ],
+          "samples_ts": [
+            33.7644,
+            33.7797,
+            33.7911
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 160
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:04:05.065485+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:03:13Z\",\n    \"avg_ns\": 1087377770,\n    \"stddev_ns\": 174423,\n    \"avg_ts\": 117.714382,\n    \"stddev_ts\": 0.018881,\n    \"samples_ns\": [ 1087306140, 1087576605, 1087250565 ],\n    \"samples_ts\": [ 117.722, 117.693, 117.728 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:03:18Z\",\n    \"avg_ns\": 15551291299,\n    \"stddev_ns\": 47305625,\n    \"avg_ts\": 32.923514,\n    \"stddev_ts\": 0.099976,\n    \"samples_ns\": [ 15605872258, 15522126948, 15525874691 ],\n    \"samples_ts\": [ 32.8082, 32.9852, 32.9772 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:03:13Z",
+          "avg_ns": 1087377770,
+          "stddev_ns": 174423,
+          "avg_ts": 117.714382,
+          "stddev_ts": 0.018881,
+          "samples_ns": [
+            1087306140,
+            1087576605,
+            1087250565
+          ],
+          "samples_ts": [
+            117.722,
+            117.693,
+            117.728
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:03:18Z",
+          "avg_ns": 15551291299,
+          "stddev_ns": 47305625,
+          "avg_ts": 32.923514,
+          "stddev_ts": 0.099976,
+          "samples_ns": [
+            15605872258,
+            15522126948,
+            15525874691
+          ],
+          "samples_ts": [
+            32.8082,
+            32.9852,
+            32.9772
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 161
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:04:35.443318+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:04:05Z\",\n    \"avg_ns\": 4576227433,\n    \"stddev_ns\": 692770,\n    \"avg_ts\": 111.882553,\n    \"stddev_ts\": 0.016855,\n    \"samples_ns\": [ 4576014496, 4576998242, 4575669562 ],\n    \"samples_ts\": [ 111.888, 111.864, 111.896 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:04:24Z\",\n    \"avg_ns\": 3761353711,\n    \"stddev_ns\": 651002,\n    \"avg_ts\": 34.030302,\n    \"stddev_ts\": 0.005890,\n    \"samples_ns\": [ 3761960292, 3760665909, 3761434932 ],\n    \"samples_ts\": [ 34.0248, 34.0365, 34.0296 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:04:05Z",
+          "avg_ns": 4576227433,
+          "stddev_ns": 692770,
+          "avg_ts": 111.882553,
+          "stddev_ts": 0.016855,
+          "samples_ns": [
+            4576014496,
+            4576998242,
+            4575669562
+          ],
+          "samples_ts": [
+            111.888,
+            111.864,
+            111.896
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:04:24Z",
+          "avg_ns": 3761353711,
+          "stddev_ns": 651002,
+          "avg_ts": 34.030302,
+          "stddev_ts": 0.00589,
+          "samples_ns": [
+            3761960292,
+            3760665909,
+            3761434932
+          ],
+          "samples_ts": [
+            34.0248,
+            34.0365,
+            34.0296
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 162
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:05:41.294043+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:04:36Z\",\n    \"avg_ns\": 4557751611,\n    \"stddev_ns\": 520683,\n    \"avg_ts\": 112.336092,\n    \"stddev_ts\": 0.012616,\n    \"samples_ns\": [ 4558146321, 4557173273, 4557935241 ],\n    \"samples_ts\": [ 112.326, 112.35, 112.332 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:04:54Z\",\n    \"avg_ns\": 15610145377,\n    \"stddev_ns\": 2133151,\n    \"avg_ts\": 32.799182,\n    \"stddev_ts\": 0.004467,\n    \"samples_ns\": [ 15608124501, 15612362530, 15609949102 ],\n    \"samples_ts\": [ 32.8034, 32.7945, 32.7996 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:04:36Z",
+          "avg_ns": 4557751611,
+          "stddev_ns": 520683,
+          "avg_ts": 112.336092,
+          "stddev_ts": 0.012616,
+          "samples_ns": [
+            4558146321,
+            4557173273,
+            4557935241
+          ],
+          "samples_ts": [
+            112.326,
+            112.35,
+            112.332
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:04:54Z",
+          "avg_ns": 15610145377,
+          "stddev_ns": 2133151,
+          "avg_ts": 32.799182,
+          "stddev_ts": 0.004467,
+          "samples_ns": [
+            15608124501,
+            15612362530,
+            15609949102
+          ],
+          "samples_ts": [
+            32.8034,
+            32.7945,
+            32.7996
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 163
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:05:57.712334+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:05:41Z\",\n    \"avg_ns\": 1086645871,\n    \"stddev_ns\": 120829,\n    \"avg_ts\": 117.793666,\n    \"stddev_ts\": 0.012084,\n    \"samples_ns\": [ 1086547404, 1086623310, 1086766901 ],\n    \"samples_ts\": [ 117.804, 117.796, 117.781 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:05:46Z\",\n    \"avg_ns\": 3756777430,\n    \"stddev_ns\": 178112,\n    \"avg_ts\": 34.071755,\n    \"stddev_ts\": 0.001517,\n    \"samples_ns\": [ 3756612903, 3756947247, 3756772141 ],\n    \"samples_ts\": [ 34.0732, 34.0702, 34.0718 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:05:41Z",
+          "avg_ns": 1086645871,
+          "stddev_ns": 120829,
+          "avg_ts": 117.793666,
+          "stddev_ts": 0.012084,
+          "samples_ns": [
+            1086547404,
+            1086623310,
+            1086766901
+          ],
+          "samples_ts": [
+            117.804,
+            117.796,
+            117.781
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:05:46Z",
+          "avg_ns": 3756777430,
+          "stddev_ns": 178112,
+          "avg_ts": 34.071755,
+          "stddev_ts": 0.001517,
+          "samples_ns": [
+            3756612903,
+            3756947247,
+            3756772141
+          ],
+          "samples_ts": [
+            34.0732,
+            34.0702,
+            34.0718
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 164
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:06:49.403820+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:05:58Z\",\n    \"avg_ns\": 1085964870,\n    \"stddev_ns\": 139601,\n    \"avg_ts\": 117.867534,\n    \"stddev_ts\": 0.015151,\n    \"samples_ns\": [ 1086125158, 1085899542, 1085869910 ],\n    \"samples_ts\": [ 117.85, 117.875, 117.878 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:06:02Z\",\n    \"avg_ns\": 15521235944,\n    \"stddev_ns\": 32806057,\n    \"avg_ts\": 32.987162,\n    \"stddev_ts\": 0.069638,\n    \"samples_ns\": [ 15559061740, 15500554221, 15504091872 ],\n    \"samples_ts\": [ 32.9069, 33.0311, 33.0235 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:05:58Z",
+          "avg_ns": 1085964870,
+          "stddev_ns": 139601,
+          "avg_ts": 117.867534,
+          "stddev_ts": 0.015151,
+          "samples_ns": [
+            1086125158,
+            1085899542,
+            1085869910
+          ],
+          "samples_ts": [
+            117.85,
+            117.875,
+            117.878
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:06:02Z",
+          "avg_ns": 15521235944,
+          "stddev_ns": 32806057,
+          "avg_ts": 32.987162,
+          "stddev_ts": 0.069638,
+          "samples_ns": [
+            15559061740,
+            15500554221,
+            15504091872
+          ],
+          "samples_ts": [
+            32.9069,
+            33.0311,
+            33.0235
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 165
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:07:20.969587+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:06:50Z\",\n    \"avg_ns\": 4846784729,\n    \"stddev_ns\": 1007852,\n    \"avg_ts\": 105.637045,\n    \"stddev_ts\": 0.021964,\n    \"samples_ns\": [ 4846032297, 4847929808, 4846392082 ],\n    \"samples_ts\": [ 105.653, 105.612, 105.646 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:07:09Z\",\n    \"avg_ns\": 3789288872,\n    \"stddev_ns\": 1155087,\n    \"avg_ts\": 33.779427,\n    \"stddev_ts\": 0.010281,\n    \"samples_ns\": [ 3790617875, 3788700218, 3788548524 ],\n    \"samples_ts\": [ 33.7676, 33.7847, 33.786 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:06:50Z",
+          "avg_ns": 4846784729,
+          "stddev_ns": 1007852,
+          "avg_ts": 105.637045,
+          "stddev_ts": 0.021964,
+          "samples_ns": [
+            4846032297,
+            4847929808,
+            4846392082
+          ],
+          "samples_ts": [
+            105.653,
+            105.612,
+            105.646
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:07:09Z",
+          "avg_ns": 3789288872,
+          "stddev_ns": 1155087,
+          "avg_ts": 33.779427,
+          "stddev_ts": 0.010281,
+          "samples_ns": [
+            3790617875,
+            3788700218,
+            3788548524
+          ],
+          "samples_ts": [
+            33.7676,
+            33.7847,
+            33.786
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 166
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:08:27.739893+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:07:21Z\",\n    \"avg_ns\": 4861001084,\n    \"stddev_ns\": 807554,\n    \"avg_ts\": 105.328100,\n    \"stddev_ts\": 0.017366,\n    \"samples_ns\": [ 4860669141, 4861915242, 4860418871 ],\n    \"samples_ts\": [ 105.335, 105.308, 105.341 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:07:41Z\",\n    \"avg_ns\": 15510637476,\n    \"stddev_ns\": 9880592,\n    \"avg_ts\": 33.009613,\n    \"stddev_ts\": 0.021022,\n    \"samples_ns\": [ 15521768066, 15502902255, 15507242107 ],\n    \"samples_ts\": [ 32.9859, 33.0261, 33.0168 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:07:21Z",
+          "avg_ns": 4861001084,
+          "stddev_ns": 807554,
+          "avg_ts": 105.3281,
+          "stddev_ts": 0.017366,
+          "samples_ns": [
+            4860669141,
+            4861915242,
+            4860418871
+          ],
+          "samples_ts": [
+            105.335,
+            105.308,
+            105.341
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:07:41Z",
+          "avg_ns": 15510637476,
+          "stddev_ns": 9880592,
+          "avg_ts": 33.009613,
+          "stddev_ts": 0.021022,
+          "samples_ns": [
+            15521768066,
+            15502902255,
+            15507242107
+          ],
+          "samples_ts": [
+            32.9859,
+            33.0261,
+            33.0168
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 167
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:08:44.260930+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:08:28Z\",\n    \"avg_ns\": 1086070906,\n    \"stddev_ns\": 114432,\n    \"avg_ts\": 117.856026,\n    \"stddev_ts\": 0.011342,\n    \"samples_ns\": [ 1086159730, 1085955745, 1086097245 ],\n    \"samples_ts\": [ 117.846, 117.869, 117.853 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:08:32Z\",\n    \"avg_ns\": 3782075844,\n    \"stddev_ns\": 385052,\n    \"avg_ts\": 33.843848,\n    \"stddev_ts\": 0.003357,\n    \"samples_ns\": [ 3782393499, 3782172013, 3781662022 ],\n    \"samples_ts\": [ 33.841, 33.843, 33.8476 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:08:28Z",
+          "avg_ns": 1086070906,
+          "stddev_ns": 114432,
+          "avg_ts": 117.856026,
+          "stddev_ts": 0.011342,
+          "samples_ns": [
+            1086159730,
+            1085955745,
+            1086097245
+          ],
+          "samples_ts": [
+            117.846,
+            117.869,
+            117.853
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:08:32Z",
+          "avg_ns": 3782075844,
+          "stddev_ns": 385052,
+          "avg_ts": 33.843848,
+          "stddev_ts": 0.003357,
+          "samples_ns": [
+            3782393499,
+            3782172013,
+            3781662022
+          ],
+          "samples_ts": [
+            33.841,
+            33.843,
+            33.8476
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 168
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:09:36.158039+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:08:44Z\",\n    \"avg_ns\": 1085917938,\n    \"stddev_ns\": 91951,\n    \"avg_ts\": 117.872627,\n    \"stddev_ts\": 0.008604,\n    \"samples_ns\": [ 1085831421, 1085987069, 1085935326 ],\n    \"samples_ts\": [ 117.882, 117.865, 117.871 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:08:49Z\",\n    \"avg_ns\": 15589422494,\n    \"stddev_ns\": 2408208,\n    \"avg_ts\": 32.842782,\n    \"stddev_ts\": 0.005059,\n    \"samples_ns\": [ 15588546667, 15587581597, 15592139220 ],\n    \"samples_ts\": [ 32.8446, 32.8467, 32.8371 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:08:44Z",
+          "avg_ns": 1085917938,
+          "stddev_ns": 91951,
+          "avg_ts": 117.872627,
+          "stddev_ts": 0.008604,
+          "samples_ns": [
+            1085831421,
+            1085987069,
+            1085935326
+          ],
+          "samples_ts": [
+            117.882,
+            117.865,
+            117.871
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:08:49Z",
+          "avg_ns": 15589422494,
+          "stddev_ns": 2408208,
+          "avg_ts": 32.842782,
+          "stddev_ts": 0.005059,
+          "samples_ns": [
+            15588546667,
+            15587581597,
+            15592139220
+          ],
+          "samples_ts": [
+            32.8446,
+            32.8467,
+            32.8371
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 169
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:10:06.350985+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:09:36Z\",\n    \"avg_ns\": 4541353810,\n    \"stddev_ns\": 285971,\n    \"avg_ts\": 112.741712,\n    \"stddev_ts\": 0.006899,\n    \"samples_ns\": [ 4541674514, 4541203463, 4541183454 ],\n    \"samples_ts\": [ 112.734, 112.745, 112.746 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:09:55Z\",\n    \"avg_ns\": 3745404282,\n    \"stddev_ns\": 1245621,\n    \"avg_ts\": 34.175218,\n    \"stddev_ts\": 0.011354,\n    \"samples_ns\": [ 3746313242, 3743986386, 3745913219 ],\n    \"samples_ts\": [ 34.1669, 34.1882, 34.1706 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:09:36Z",
+          "avg_ns": 4541353810,
+          "stddev_ns": 285971,
+          "avg_ts": 112.741712,
+          "stddev_ts": 0.006899,
+          "samples_ns": [
+            4541674514,
+            4541203463,
+            4541183454
+          ],
+          "samples_ts": [
+            112.734,
+            112.745,
+            112.746
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:09:55Z",
+          "avg_ns": 3745404282,
+          "stddev_ns": 1245621,
+          "avg_ts": 34.175218,
+          "stddev_ts": 0.011354,
+          "samples_ns": [
+            3746313242,
+            3743986386,
+            3745913219
+          ],
+          "samples_ts": [
+            34.1669,
+            34.1882,
+            34.1706
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 170
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:11:11.920690+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:10:07Z\",\n    \"avg_ns\": 4548851732,\n    \"stddev_ns\": 231579,\n    \"avg_ts\": 112.555878,\n    \"stddev_ts\": 0.005222,\n    \"samples_ns\": [ 4548988681, 4548957801, 4548608716 ],\n    \"samples_ts\": [ 112.552, 112.553, 112.562 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:10:25Z\",\n    \"avg_ns\": 15526173884,\n    \"stddev_ns\": 48233727,\n    \"avg_ts\": 32.976784,\n    \"stddev_ts\": 0.102265,\n    \"samples_ns\": [ 15581800102, 15495955487, 15500766063 ],\n    \"samples_ts\": [ 32.8588, 33.0409, 33.0306 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:10:07Z",
+          "avg_ns": 4548851732,
+          "stddev_ns": 231579,
+          "avg_ts": 112.555878,
+          "stddev_ts": 0.005222,
+          "samples_ns": [
+            4548988681,
+            4548957801,
+            4548608716
+          ],
+          "samples_ts": [
+            112.552,
+            112.553,
+            112.562
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:10:25Z",
+          "avg_ns": 15526173884,
+          "stddev_ns": 48233727,
+          "avg_ts": 32.976784,
+          "stddev_ts": 0.102265,
+          "samples_ns": [
+            15581800102,
+            15495955487,
+            15500766063
+          ],
+          "samples_ts": [
+            32.8588,
+            33.0409,
+            33.0306
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 171
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:11:28.303294+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:11:12Z\",\n    \"avg_ns\": 1087368413,\n    \"stddev_ns\": 178342,\n    \"avg_ts\": 117.715395,\n    \"stddev_ts\": 0.018633,\n    \"samples_ns\": [ 1087257574, 1087280944, 1087566723 ],\n    \"samples_ts\": [ 117.727, 117.725, 117.694 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:11:16Z\",\n    \"avg_ns\": 3740434743,\n    \"stddev_ns\": 776680,\n    \"avg_ts\": 34.220622,\n    \"stddev_ts\": 0.007083,\n    \"samples_ns\": [ 3741313539, 3739852935, 3740137756 ],\n    \"samples_ts\": [ 34.2126, 34.2259, 34.2233 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:11:12Z",
+          "avg_ns": 1087368413,
+          "stddev_ns": 178342,
+          "avg_ts": 117.715395,
+          "stddev_ts": 0.018633,
+          "samples_ns": [
+            1087257574,
+            1087280944,
+            1087566723
+          ],
+          "samples_ts": [
+            117.727,
+            117.725,
+            117.694
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:11:16Z",
+          "avg_ns": 3740434743,
+          "stddev_ns": 776680,
+          "avg_ts": 34.220622,
+          "stddev_ts": 0.007083,
+          "samples_ns": [
+            3741313539,
+            3739852935,
+            3740137756
+          ],
+          "samples_ts": [
+            34.2126,
+            34.2259,
+            34.2233
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 172
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:12:19.777660+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:11:28Z\",\n    \"avg_ns\": 1085952577,\n    \"stddev_ns\": 264847,\n    \"avg_ts\": 117.868872,\n    \"stddev_ts\": 0.028301,\n    \"samples_ns\": [ 1086046709, 1086153154, 1085657870 ],\n    \"samples_ts\": [ 117.859, 117.847, 117.901 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:11:33Z\",\n    \"avg_ns\": 15446734086,\n    \"stddev_ns\": 25782842,\n    \"avg_ts\": 33.146227,\n    \"stddev_ts\": 0.055273,\n    \"samples_ns\": [ 15476505540, 15431829436, 15431867282 ],\n    \"samples_ts\": [ 33.0824, 33.1782, 33.1781 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:11:28Z",
+          "avg_ns": 1085952577,
+          "stddev_ns": 264847,
+          "avg_ts": 117.868872,
+          "stddev_ts": 0.028301,
+          "samples_ns": [
+            1086046709,
+            1086153154,
+            1085657870
+          ],
+          "samples_ts": [
+            117.859,
+            117.847,
+            117.901
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:11:33Z",
+          "avg_ns": 15446734086,
+          "stddev_ns": 25782842,
+          "avg_ts": 33.146227,
+          "stddev_ts": 0.055273,
+          "samples_ns": [
+            15476505540,
+            15431829436,
+            15431867282
+          ],
+          "samples_ts": [
+            33.0824,
+            33.1782,
+            33.1781
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 173
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:12:50.072846+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:12:20Z\",\n    \"avg_ns\": 4560014372,\n    \"stddev_ns\": 157894,\n    \"avg_ts\": 112.280348,\n    \"stddev_ts\": 0.003888,\n    \"samples_ns\": [ 4559833205, 4560122688, 4560087223 ],\n    \"samples_ts\": [ 112.285, 112.278, 112.279 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:12:38Z\",\n    \"avg_ns\": 3747143112,\n    \"stddev_ns\": 1212751,\n    \"avg_ts\": 34.159360,\n    \"stddev_ts\": 0.011039,\n    \"samples_ns\": [ 3748540657, 3746490940, 3746397740 ],\n    \"samples_ts\": [ 34.1466, 34.1653, 34.1662 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:12:20Z",
+          "avg_ns": 4560014372,
+          "stddev_ns": 157894,
+          "avg_ts": 112.280348,
+          "stddev_ts": 0.003888,
+          "samples_ns": [
+            4559833205,
+            4560122688,
+            4560087223
+          ],
+          "samples_ts": [
+            112.285,
+            112.278,
+            112.279
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:12:38Z",
+          "avg_ns": 3747143112,
+          "stddev_ns": 1212751,
+          "avg_ts": 34.15936,
+          "stddev_ts": 0.011039,
+          "samples_ns": [
+            3748540657,
+            3746490940,
+            3746397740
+          ],
+          "samples_ts": [
+            34.1466,
+            34.1653,
+            34.1662
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 174
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:13:55.490183+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:12:50Z\",\n    \"avg_ns\": 4556337712,\n    \"stddev_ns\": 495038,\n    \"avg_ts\": 112.370951,\n    \"stddev_ts\": 0.012094,\n    \"samples_ns\": [ 4555980259, 4556896804, 4556136074 ],\n    \"samples_ts\": [ 112.38, 112.357, 112.376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:13:08Z\",\n    \"avg_ns\": 15467120258,\n    \"stddev_ns\": 1526763,\n    \"avg_ts\": 33.102478,\n    \"stddev_ts\": 0.003246,\n    \"samples_ns\": [ 15467877568, 15465374148, 15468109060 ],\n    \"samples_ts\": [ 33.1009, 33.1062, 33.1004 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:12:50Z",
+          "avg_ns": 4556337712,
+          "stddev_ns": 495038,
+          "avg_ts": 112.370951,
+          "stddev_ts": 0.012094,
+          "samples_ns": [
+            4555980259,
+            4556896804,
+            4556136074
+          ],
+          "samples_ts": [
+            112.38,
+            112.357,
+            112.376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:13:08Z",
+          "avg_ns": 15467120258,
+          "stddev_ns": 1526763,
+          "avg_ts": 33.102478,
+          "stddev_ts": 0.003246,
+          "samples_ns": [
+            15467877568,
+            15465374148,
+            15468109060
+          ],
+          "samples_ts": [
+            33.1009,
+            33.1062,
+            33.1004
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 175
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:14:11.804888+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:13:56Z\",\n    \"avg_ns\": 1086289378,\n    \"stddev_ns\": 205821,\n    \"avg_ts\": 117.832325,\n    \"stddev_ts\": 0.022324,\n    \"samples_ns\": [ 1086216909, 1086129593, 1086521632 ],\n    \"samples_ts\": [ 117.84, 117.85, 117.807 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:14:00Z\",\n    \"avg_ns\": 3722154045,\n    \"stddev_ns\": 574037,\n    \"avg_ts\": 34.388690,\n    \"stddev_ts\": 0.005243,\n    \"samples_ns\": [ 3722679457, 3721552176, 3722230504 ],\n    \"samples_ts\": [ 34.3838, 34.3943, 34.388 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:13:56Z",
+          "avg_ns": 1086289378,
+          "stddev_ns": 205821,
+          "avg_ts": 117.832325,
+          "stddev_ts": 0.022324,
+          "samples_ns": [
+            1086216909,
+            1086129593,
+            1086521632
+          ],
+          "samples_ts": [
+            117.84,
+            117.85,
+            117.807
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:14:00Z",
+          "avg_ns": 3722154045,
+          "stddev_ns": 574037,
+          "avg_ts": 34.38869,
+          "stddev_ts": 0.005243,
+          "samples_ns": [
+            3722679457,
+            3721552176,
+            3722230504
+          ],
+          "samples_ts": [
+            34.3838,
+            34.3943,
+            34.388
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 176
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:15:03.121800+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:14:12Z\",\n    \"avg_ns\": 1086161961,\n    \"stddev_ns\": 66462,\n    \"avg_ts\": 117.846145,\n    \"stddev_ts\": 0.007211,\n    \"samples_ns\": [ 1086085764, 1086207986, 1086192133 ],\n    \"samples_ts\": [ 117.854, 117.841, 117.843 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:14:16Z\",\n    \"avg_ns\": 15392953161,\n    \"stddev_ns\": 75449494,\n    \"avg_ts\": 33.262505,\n    \"stddev_ts\": 0.162583,\n    \"samples_ns\": [ 15479966685, 15345694009, 15353198791 ],\n    \"samples_ts\": [ 33.075, 33.3644, 33.3481 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:14:12Z",
+          "avg_ns": 1086161961,
+          "stddev_ns": 66462,
+          "avg_ts": 117.846145,
+          "stddev_ts": 0.007211,
+          "samples_ns": [
+            1086085764,
+            1086207986,
+            1086192133
+          ],
+          "samples_ts": [
+            117.854,
+            117.841,
+            117.843
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:14:16Z",
+          "avg_ns": 15392953161,
+          "stddev_ns": 75449494,
+          "avg_ts": 33.262505,
+          "stddev_ts": 0.162583,
+          "samples_ns": [
+            15479966685,
+            15345694009,
+            15353198791
+          ],
+          "samples_ts": [
+            33.075,
+            33.3644,
+            33.3481
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 177
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:15:34.701156+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:15:03Z\",\n    \"avg_ns\": 4854099567,\n    \"stddev_ns\": 561864,\n    \"avg_ts\": 105.477854,\n    \"stddev_ts\": 0.012209,\n    \"samples_ns\": [ 4854629033, 4853510117, 4854159551 ],\n    \"samples_ts\": [ 105.466, 105.491, 105.477 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:15:23Z\",\n    \"avg_ns\": 3790626411,\n    \"stddev_ns\": 508255,\n    \"avg_ts\": 33.767506,\n    \"stddev_ts\": 0.004528,\n    \"samples_ns\": [ 3791117024, 3790660028, 3790102181 ],\n    \"samples_ts\": [ 33.7631, 33.7672, 33.7722 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:15:03Z",
+          "avg_ns": 4854099567,
+          "stddev_ns": 561864,
+          "avg_ts": 105.477854,
+          "stddev_ts": 0.012209,
+          "samples_ns": [
+            4854629033,
+            4853510117,
+            4854159551
+          ],
+          "samples_ts": [
+            105.466,
+            105.491,
+            105.477
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:15:23Z",
+          "avg_ns": 3790626411,
+          "stddev_ns": 508255,
+          "avg_ts": 33.767506,
+          "stddev_ts": 0.004528,
+          "samples_ns": [
+            3791117024,
+            3790660028,
+            3790102181
+          ],
+          "samples_ts": [
+            33.7631,
+            33.7672,
+            33.7722
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 178
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:16:40.855672+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:15:35Z\",\n    \"avg_ns\": 4832811590,\n    \"stddev_ns\": 989437,\n    \"avg_ts\": 105.942474,\n    \"stddev_ts\": 0.021585,\n    \"samples_ns\": [ 4831679232, 4833290342, 4833465198 ],\n    \"samples_ts\": [ 105.967, 105.932, 105.928 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:15:54Z\",\n    \"avg_ns\": 15342098538,\n    \"stddev_ns\": 20081467,\n    \"avg_ts\": 33.372265,\n    \"stddev_ts\": 0.043648,\n    \"samples_ns\": [ 15365285945, 15330414713, 15330594957 ],\n    \"samples_ts\": [ 33.3219, 33.3977, 33.3973 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:15:35Z",
+          "avg_ns": 4832811590,
+          "stddev_ns": 989437,
+          "avg_ts": 105.942474,
+          "stddev_ts": 0.021585,
+          "samples_ns": [
+            4831679232,
+            4833290342,
+            4833465198
+          ],
+          "samples_ts": [
+            105.967,
+            105.932,
+            105.928
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:15:54Z",
+          "avg_ns": 15342098538,
+          "stddev_ns": 20081467,
+          "avg_ts": 33.372265,
+          "stddev_ts": 0.043648,
+          "samples_ns": [
+            15365285945,
+            15330414713,
+            15330594957
+          ],
+          "samples_ts": [
+            33.3219,
+            33.3977,
+            33.3973
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 179
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:16:53.836842+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:16:41Z\",\n    \"avg_ns\": 564252829,\n    \"stddev_ns\": 582139,\n    \"avg_ts\": 226.848824,\n    \"stddev_ts\": 0.233789,\n    \"samples_ns\": [ 564600580, 564576006, 563581903 ],\n    \"samples_ts\": [ 226.709, 226.719, 227.119 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:16:43Z\",\n    \"avg_ns\": 3313411789,\n    \"stddev_ns\": 809508,\n    \"avg_ts\": 38.630878,\n    \"stddev_ts\": 0.009390,\n    \"samples_ns\": [ 3314240279, 3312631647, 3313363443 ],\n    \"samples_ts\": [ 38.6212, 38.64, 38.6314 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:16:41Z",
+          "avg_ns": 564252829,
+          "stddev_ns": 582139,
+          "avg_ts": 226.848824,
+          "stddev_ts": 0.233789,
+          "samples_ns": [
+            564600580,
+            564576006,
+            563581903
+          ],
+          "samples_ts": [
+            226.709,
+            226.719,
+            227.119
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:16:43Z",
+          "avg_ns": 3313411789,
+          "stddev_ns": 809508,
+          "avg_ts": 38.630878,
+          "stddev_ts": 0.00939,
+          "samples_ns": [
+            3314240279,
+            3312631647,
+            3313363443
+          ],
+          "samples_ts": [
+            38.6212,
+            38.64,
+            38.6314
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 180
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:17:37.465537+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:16:54Z\",\n    \"avg_ns\": 564243992,\n    \"stddev_ns\": 561868,\n    \"avg_ts\": 226.852366,\n    \"stddev_ts\": 0.225823,\n    \"samples_ns\": [ 564604447, 564530334, 563597196 ],\n    \"samples_ts\": [ 226.707, 226.737, 227.113 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:16:56Z\",\n    \"avg_ns\": 13528502932,\n    \"stddev_ns\": 10138243,\n    \"avg_ts\": 37.846035,\n    \"stddev_ts\": 0.028350,\n    \"samples_ns\": [ 13540209181, 13522567395, 13522732220 ],\n    \"samples_ts\": [ 37.8133, 37.8626, 37.8622 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:16:54Z",
+          "avg_ns": 564243992,
+          "stddev_ns": 561868,
+          "avg_ts": 226.852366,
+          "stddev_ts": 0.225823,
+          "samples_ns": [
+            564604447,
+            564530334,
+            563597196
+          ],
+          "samples_ts": [
+            226.707,
+            226.737,
+            227.113
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:16:56Z",
+          "avg_ns": 13528502932,
+          "stddev_ns": 10138243,
+          "avg_ts": 37.846035,
+          "stddev_ts": 0.02835,
+          "samples_ns": [
+            13540209181,
+            13522567395,
+            13522732220
+          ],
+          "samples_ts": [
+            37.8133,
+            37.8626,
+            37.8622
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 181
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:17:57.550597+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:17:38Z\",\n    \"avg_ns\": 2341389406,\n    \"stddev_ns\": 98318,\n    \"avg_ts\": 218.673579,\n    \"stddev_ts\": 0.009182,\n    \"samples_ns\": [ 2341323026, 2341502357, 2341342835 ],\n    \"samples_ts\": [ 218.68, 218.663, 218.678 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:17:47Z\",\n    \"avg_ns\": 3310050233,\n    \"stddev_ns\": 343173,\n    \"avg_ts\": 38.670108,\n    \"stddev_ts\": 0.003895,\n    \"samples_ns\": [ 3309669711, 3310290989, 3310190001 ],\n    \"samples_ts\": [ 38.6746, 38.6673, 38.6685 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:17:38Z",
+          "avg_ns": 2341389406,
+          "stddev_ns": 98318,
+          "avg_ts": 218.673579,
+          "stddev_ts": 0.009182,
+          "samples_ns": [
+            2341323026,
+            2341502357,
+            2341342835
+          ],
+          "samples_ts": [
+            218.68,
+            218.663,
+            218.678
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:17:47Z",
+          "avg_ns": 3310050233,
+          "stddev_ns": 343173,
+          "avg_ts": 38.670108,
+          "stddev_ts": 0.003895,
+          "samples_ns": [
+            3309669711,
+            3310290989,
+            3310190001
+          ],
+          "samples_ts": [
+            38.6746,
+            38.6673,
+            38.6685
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 182
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:18:48.525707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:17:58Z\",\n    \"avg_ns\": 2357303589,\n    \"stddev_ns\": 291635,\n    \"avg_ts\": 217.197313,\n    \"stddev_ts\": 0.026869,\n    \"samples_ns\": [ 2357068664, 2357629999, 2357212104 ],\n    \"samples_ts\": [ 217.219, 217.167, 217.206 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:18:07Z\",\n    \"avg_ns\": 13585674742,\n    \"stddev_ns\": 13065770,\n    \"avg_ts\": 37.686779,\n    \"stddev_ts\": 0.036230,\n    \"samples_ns\": [ 13575081075, 13581668618, 13600274533 ],\n    \"samples_ts\": [ 37.7162, 37.6979, 37.6463 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:17:58Z",
+          "avg_ns": 2357303589,
+          "stddev_ns": 291635,
+          "avg_ts": 217.197313,
+          "stddev_ts": 0.026869,
+          "samples_ns": [
+            2357068664,
+            2357629999,
+            2357212104
+          ],
+          "samples_ts": [
+            217.219,
+            217.167,
+            217.206
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:18:07Z",
+          "avg_ns": 13585674742,
+          "stddev_ns": 13065770,
+          "avg_ts": 37.686779,
+          "stddev_ts": 0.03623,
+          "samples_ns": [
+            13575081075,
+            13581668618,
+            13600274533
+          ],
+          "samples_ts": [
+            37.7162,
+            37.6979,
+            37.6463
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 183
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:19:01.489164+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:18:49Z\",\n    \"avg_ns\": 564215593,\n    \"stddev_ns\": 133766,\n    \"avg_ts\": 226.863642,\n    \"stddev_ts\": 0.052067,\n    \"samples_ns\": [ 564255668, 564070814, 564320299 ],\n    \"samples_ts\": [ 226.848, 226.922, 226.822 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:18:51Z\",\n    \"avg_ns\": 3306323718,\n    \"stddev_ns\": 988528,\n    \"avg_ts\": 38.713695,\n    \"stddev_ts\": 0.011533,\n    \"samples_ns\": [ 3305748552, 3305761324, 3307461280 ],\n    \"samples_ts\": [ 38.7204, 38.7203, 38.7004 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:18:49Z",
+          "avg_ns": 564215593,
+          "stddev_ns": 133766,
+          "avg_ts": 226.863642,
+          "stddev_ts": 0.052067,
+          "samples_ns": [
+            564255668,
+            564070814,
+            564320299
+          ],
+          "samples_ts": [
+            226.848,
+            226.922,
+            226.822
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:18:51Z",
+          "avg_ns": 3306323718,
+          "stddev_ns": 988528,
+          "avg_ts": 38.713695,
+          "stddev_ts": 0.011533,
+          "samples_ns": [
+            3305748552,
+            3305761324,
+            3307461280
+          ],
+          "samples_ts": [
+            38.7204,
+            38.7203,
+            38.7004
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 184
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:19:45.052054+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:19:02Z\",\n    \"avg_ns\": 563701221,\n    \"stddev_ns\": 125758,\n    \"avg_ts\": 227.070652,\n    \"stddev_ts\": 0.048813,\n    \"samples_ns\": [ 563636937, 563625715, 563841013 ],\n    \"samples_ts\": [ 227.097, 227.101, 227.014 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:19:04Z\",\n    \"avg_ns\": 13505800222,\n    \"stddev_ns\": 1516481,\n    \"avg_ts\": 37.909639,\n    \"stddev_ts\": 0.004231,\n    \"samples_ns\": [ 13507517405, 13505188951, 13504694312 ],\n    \"samples_ts\": [ 37.9048, 37.9114, 37.9127 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:19:02Z",
+          "avg_ns": 563701221,
+          "stddev_ns": 125758,
+          "avg_ts": 227.070652,
+          "stddev_ts": 0.048813,
+          "samples_ns": [
+            563636937,
+            563625715,
+            563841013
+          ],
+          "samples_ts": [
+            227.097,
+            227.101,
+            227.014
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:19:04Z",
+          "avg_ns": 13505800222,
+          "stddev_ns": 1516481,
+          "avg_ts": 37.909639,
+          "stddev_ts": 0.004231,
+          "samples_ns": [
+            13507517405,
+            13505188951,
+            13504694312
+          ],
+          "samples_ts": [
+            37.9048,
+            37.9114,
+            37.9127
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 185
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:20:05.538723+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:19:45Z\",\n    \"avg_ns\": 2398911080,\n    \"stddev_ns\": 219770,\n    \"avg_ts\": 213.430171,\n    \"stddev_ts\": 0.018555,\n    \"samples_ns\": [ 2399151468, 2398778166, 2398803608 ],\n    \"samples_ts\": [ 213.409, 213.442, 213.44 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:19:55Z\",\n    \"avg_ns\": 3349885592,\n    \"stddev_ns\": 829077,\n    \"avg_ts\": 38.210262,\n    \"stddev_ts\": 0.009457,\n    \"samples_ns\": [ 3349011261, 3350660439, 3349985076 ],\n    \"samples_ts\": [ 38.2202, 38.2014, 38.2091 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:19:45Z",
+          "avg_ns": 2398911080,
+          "stddev_ns": 219770,
+          "avg_ts": 213.430171,
+          "stddev_ts": 0.018555,
+          "samples_ns": [
+            2399151468,
+            2398778166,
+            2398803608
+          ],
+          "samples_ts": [
+            213.409,
+            213.442,
+            213.44
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:19:55Z",
+          "avg_ns": 3349885592,
+          "stddev_ns": 829077,
+          "avg_ts": 38.210262,
+          "stddev_ts": 0.009457,
+          "samples_ns": [
+            3349011261,
+            3350660439,
+            3349985076
+          ],
+          "samples_ts": [
+            38.2202,
+            38.2014,
+            38.2091
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 186
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:20:56.360760+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:20:06Z\",\n    \"avg_ns\": 2370335503,\n    \"stddev_ns\": 61553,\n    \"avg_ts\": 216.003177,\n    \"stddev_ts\": 0.003432,\n    \"samples_ns\": [ 2370292110, 2370359701, 2370354699 ],\n    \"samples_ts\": [ 216.007, 216.001, 216.001 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:20:15Z\",\n    \"avg_ns\": 13516110299,\n    \"stddev_ns\": 5552668,\n    \"avg_ts\": 37.880725,\n    \"stddev_ts\": 0.015559,\n    \"samples_ns\": [ 13510521269, 13521623392, 13516186237 ],\n    \"samples_ts\": [ 37.8964, 37.8653, 37.8805 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:20:06Z",
+          "avg_ns": 2370335503,
+          "stddev_ns": 61553,
+          "avg_ts": 216.003177,
+          "stddev_ts": 0.003432,
+          "samples_ns": [
+            2370292110,
+            2370359701,
+            2370354699
+          ],
+          "samples_ts": [
+            216.007,
+            216.001,
+            216.001
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:20:15Z",
+          "avg_ns": 13516110299,
+          "stddev_ns": 5552668,
+          "avg_ts": 37.880725,
+          "stddev_ts": 0.015559,
+          "samples_ns": [
+            13510521269,
+            13521623392,
+            13516186237
+          ],
+          "samples_ts": [
+            37.8964,
+            37.8653,
+            37.8805
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 187
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:21:09.374112+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:20:57Z\",\n    \"avg_ns\": 563849381,\n    \"stddev_ns\": 36401,\n    \"avg_ts\": 227.010979,\n    \"stddev_ts\": 0.011108,\n    \"samples_ns\": [ 563817764, 563868579, 563861801 ],\n    \"samples_ts\": [ 227.024, 227.003, 227.006 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:20:59Z\",\n    \"avg_ns\": 3323769778,\n    \"stddev_ns\": 1083681,\n    \"avg_ts\": 38.510492,\n    \"stddev_ts\": 0.012520,\n    \"samples_ns\": [ 3324878903, 3322720140, 3323710293 ],\n    \"samples_ts\": [ 38.4976, 38.5227, 38.5112 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:20:57Z",
+          "avg_ns": 563849381,
+          "stddev_ns": 36401,
+          "avg_ts": 227.010979,
+          "stddev_ts": 0.011108,
+          "samples_ns": [
+            563817764,
+            563868579,
+            563861801
+          ],
+          "samples_ts": [
+            227.024,
+            227.003,
+            227.006
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:20:59Z",
+          "avg_ns": 3323769778,
+          "stddev_ns": 1083681,
+          "avg_ts": 38.510492,
+          "stddev_ts": 0.01252,
+          "samples_ns": [
+            3324878903,
+            3322720140,
+            3323710293
+          ],
+          "samples_ts": [
+            38.4976,
+            38.5227,
+            38.5112
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 188
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:21:53.087414+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:21:10Z\",\n    \"avg_ns\": 563547166,\n    \"stddev_ns\": 18763,\n    \"avg_ts\": 227.132719,\n    \"stddev_ts\": 0.007562,\n    \"samples_ns\": [ 563530999, 563567741, 563542758 ],\n    \"samples_ts\": [ 227.139, 227.124, 227.134 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:21:12Z\",\n    \"avg_ns\": 13557106994,\n    \"stddev_ns\": 4292674,\n    \"avg_ts\": 37.766172,\n    \"stddev_ts\": 0.011953,\n    \"samples_ns\": [ 13561692932, 13556438951, 13553189100 ],\n    \"samples_ts\": [ 37.7534, 37.768, 37.7771 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:21:10Z",
+          "avg_ns": 563547166,
+          "stddev_ns": 18763,
+          "avg_ts": 227.132719,
+          "stddev_ts": 0.007562,
+          "samples_ns": [
+            563530999,
+            563567741,
+            563542758
+          ],
+          "samples_ts": [
+            227.139,
+            227.124,
+            227.134
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:21:12Z",
+          "avg_ns": 13557106994,
+          "stddev_ns": 4292674,
+          "avg_ts": 37.766172,
+          "stddev_ts": 0.011953,
+          "samples_ns": [
+            13561692932,
+            13556438951,
+            13553189100
+          ],
+          "samples_ts": [
+            37.7534,
+            37.768,
+            37.7771
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 189
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:22:14.045758+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:21:53Z\",\n    \"avg_ns\": 2551444093,\n    \"stddev_ns\": 253742,\n    \"avg_ts\": 200.670673,\n    \"stddev_ts\": 0.019556,\n    \"samples_ns\": [ 2551272569, 2551729275, 2551330436 ],\n    \"samples_ts\": [ 200.684, 200.648, 200.68 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:22:03Z\",\n    \"avg_ns\": 3319742872,\n    \"stddev_ns\": 366795,\n    \"avg_ts\": 38.557203,\n    \"stddev_ts\": 0.004207,\n    \"samples_ns\": [ 3319826691, 3320055857, 3319346069 ],\n    \"samples_ts\": [ 38.5562, 38.5536, 38.5618 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:21:53Z",
+          "avg_ns": 2551444093,
+          "stddev_ns": 253742,
+          "avg_ts": 200.670673,
+          "stddev_ts": 0.019556,
+          "samples_ns": [
+            2551272569,
+            2551729275,
+            2551330436
+          ],
+          "samples_ts": [
+            200.684,
+            200.648,
+            200.68
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:22:03Z",
+          "avg_ns": 3319742872,
+          "stddev_ns": 366795,
+          "avg_ts": 38.557203,
+          "stddev_ts": 0.004207,
+          "samples_ns": [
+            3319826691,
+            3320055857,
+            3319346069
+          ],
+          "samples_ts": [
+            38.5562,
+            38.5536,
+            38.5618
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 190
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:23:05.671418+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:22:14Z\",\n    \"avg_ns\": 2543950835,\n    \"stddev_ns\": 407965,\n    \"avg_ts\": 201.261754,\n    \"stddev_ts\": 0.031778,\n    \"samples_ns\": [ 2544357860, 2543939928, 2543554719 ],\n    \"samples_ts\": [ 201.23, 201.263, 201.293 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:22:24Z\",\n    \"avg_ns\": 13551267684,\n    \"stddev_ns\": 7336103,\n    \"avg_ts\": 37.782450,\n    \"stddev_ts\": 0.020456,\n    \"samples_ns\": [ 13543497168, 13552231859, 13558074025 ],\n    \"samples_ts\": [ 37.8041, 37.7798, 37.7635 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:22:14Z",
+          "avg_ns": 2543950835,
+          "stddev_ns": 407965,
+          "avg_ts": 201.261754,
+          "stddev_ts": 0.031778,
+          "samples_ns": [
+            2544357860,
+            2543939928,
+            2543554719
+          ],
+          "samples_ts": [
+            201.23,
+            201.263,
+            201.293
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:22:24Z",
+          "avg_ns": 13551267684,
+          "stddev_ns": 7336103,
+          "avg_ts": 37.78245,
+          "stddev_ts": 0.020456,
+          "samples_ns": [
+            13543497168,
+            13552231859,
+            13558074025
+          ],
+          "samples_ts": [
+            37.8041,
+            37.7798,
+            37.7635
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 191
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:23:18.664948+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:23:06Z\",\n    \"avg_ns\": 563129753,\n    \"stddev_ns\": 105877,\n    \"avg_ts\": 227.301084,\n    \"stddev_ts\": 0.042734,\n    \"samples_ns\": [ 563243088, 563033381, 563112790 ],\n    \"samples_ts\": [ 227.255, 227.34, 227.308 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:23:08Z\",\n    \"avg_ns\": 3317865230,\n    \"stddev_ns\": 663595,\n    \"avg_ts\": 38.579024,\n    \"stddev_ts\": 0.007716,\n    \"samples_ns\": [ 3318535523, 3317851625, 3317208542 ],\n    \"samples_ts\": [ 38.5712, 38.5792, 38.5867 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:23:06Z",
+          "avg_ns": 563129753,
+          "stddev_ns": 105877,
+          "avg_ts": 227.301084,
+          "stddev_ts": 0.042734,
+          "samples_ns": [
+            563243088,
+            563033381,
+            563112790
+          ],
+          "samples_ts": [
+            227.255,
+            227.34,
+            227.308
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:23:08Z",
+          "avg_ns": 3317865230,
+          "stddev_ns": 663595,
+          "avg_ts": 38.579024,
+          "stddev_ts": 0.007716,
+          "samples_ns": [
+            3318535523,
+            3317851625,
+            3317208542
+          ],
+          "samples_ts": [
+            38.5712,
+            38.5792,
+            38.5867
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 192
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:24:02.450465+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:23:19Z\",\n    \"avg_ns\": 563841781,\n    \"stddev_ns\": 90466,\n    \"avg_ts\": 227.014042,\n    \"stddev_ts\": 0.033819,\n    \"samples_ns\": [ 563934869, 563818849, 563771627 ],\n    \"samples_ts\": [ 226.977, 227.023, 227.042 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:23:21Z\",\n    \"avg_ns\": 13552453151,\n    \"stddev_ns\": 3685866,\n    \"avg_ts\": 37.779140,\n    \"stddev_ts\": 0.010266,\n    \"samples_ns\": [ 13548241786, 13555065595, 13554052074 ],\n    \"samples_ts\": [ 37.7909, 37.7719, 37.7747 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:23:19Z",
+          "avg_ns": 563841781,
+          "stddev_ns": 90466,
+          "avg_ts": 227.014042,
+          "stddev_ts": 0.033819,
+          "samples_ns": [
+            563934869,
+            563818849,
+            563771627
+          ],
+          "samples_ts": [
+            226.977,
+            227.023,
+            227.042
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:23:21Z",
+          "avg_ns": 13552453151,
+          "stddev_ns": 3685866,
+          "avg_ts": 37.77914,
+          "stddev_ts": 0.010266,
+          "samples_ns": [
+            13548241786,
+            13555065595,
+            13554052074
+          ],
+          "samples_ts": [
+            37.7909,
+            37.7719,
+            37.7747
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 193
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:24:22.616180+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:24:03Z\",\n    \"avg_ns\": 2343262684,\n    \"stddev_ns\": 838424,\n    \"avg_ts\": 218.498783,\n    \"stddev_ts\": 0.078164,\n    \"samples_ns\": [ 2342852886, 2342707975, 2344227191 ],\n    \"samples_ts\": [ 218.537, 218.551, 218.409 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:24:12Z\",\n    \"avg_ns\": 3322675711,\n    \"stddev_ns\": 1221938,\n    \"avg_ts\": 38.523173,\n    \"stddev_ts\": 0.014139,\n    \"samples_ns\": [ 3323374511, 3323384730, 3321267894 ],\n    \"samples_ts\": [ 38.5151, 38.515, 38.5395 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:24:03Z",
+          "avg_ns": 2343262684,
+          "stddev_ns": 838424,
+          "avg_ts": 218.498783,
+          "stddev_ts": 0.078164,
+          "samples_ns": [
+            2342852886,
+            2342707975,
+            2344227191
+          ],
+          "samples_ts": [
+            218.537,
+            218.551,
+            218.409
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:24:12Z",
+          "avg_ns": 3322675711,
+          "stddev_ns": 1221938,
+          "avg_ts": 38.523173,
+          "stddev_ts": 0.014139,
+          "samples_ns": [
+            3323374511,
+            3323384730,
+            3321267894
+          ],
+          "samples_ts": [
+            38.5151,
+            38.515,
+            38.5395
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 194
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:25:13.916206+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:24:23Z\",\n    \"avg_ns\": 2358878369,\n    \"stddev_ns\": 451374,\n    \"avg_ts\": 217.052316,\n    \"stddev_ts\": 0.041530,\n    \"samples_ns\": [ 2359381949, 2358742960, 2358510198 ],\n    \"samples_ts\": [ 217.006, 217.065, 217.086 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:24:32Z\",\n    \"avg_ns\": 13678625985,\n    \"stddev_ns\": 7863490,\n    \"avg_ts\": 37.430668,\n    \"stddev_ts\": 0.021512,\n    \"samples_ns\": [ 13687472151, 13675975893, 13672429911 ],\n    \"samples_ts\": [ 37.4065, 37.4379, 37.4476 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:24:23Z",
+          "avg_ns": 2358878369,
+          "stddev_ns": 451374,
+          "avg_ts": 217.052316,
+          "stddev_ts": 0.04153,
+          "samples_ns": [
+            2359381949,
+            2358742960,
+            2358510198
+          ],
+          "samples_ts": [
+            217.006,
+            217.065,
+            217.086
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:24:32Z",
+          "avg_ns": 13678625985,
+          "stddev_ns": 7863490,
+          "avg_ts": 37.430668,
+          "stddev_ts": 0.021512,
+          "samples_ns": [
+            13687472151,
+            13675975893,
+            13672429911
+          ],
+          "samples_ts": [
+            37.4065,
+            37.4379,
+            37.4476
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 195
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:25:26.968213+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:25:14Z\",\n    \"avg_ns\": 564539340,\n    \"stddev_ns\": 198447,\n    \"avg_ts\": 226.733553,\n    \"stddev_ts\": 0.079126,\n    \"samples_ns\": [ 564348413, 564527673, 564741935 ],\n    \"samples_ts\": [ 226.81, 226.738, 226.652 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:25:16Z\",\n    \"avg_ns\": 3323240266,\n    \"stddev_ns\": 560560,\n    \"avg_ts\": 38.516626,\n    \"stddev_ts\": 0.006497,\n    \"samples_ns\": [ 3323340627, 3322636304, 3323743867 ],\n    \"samples_ts\": [ 38.5155, 38.5236, 38.5108 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:25:14Z",
+          "avg_ns": 564539340,
+          "stddev_ns": 198447,
+          "avg_ts": 226.733553,
+          "stddev_ts": 0.079126,
+          "samples_ns": [
+            564348413,
+            564527673,
+            564741935
+          ],
+          "samples_ts": [
+            226.81,
+            226.738,
+            226.652
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:25:16Z",
+          "avg_ns": 3323240266,
+          "stddev_ns": 560560,
+          "avg_ts": 38.516626,
+          "stddev_ts": 0.006497,
+          "samples_ns": [
+            3323340627,
+            3322636304,
+            3323743867
+          ],
+          "samples_ts": [
+            38.5155,
+            38.5236,
+            38.5108
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 196
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:26:10.649997+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:25:27Z\",\n    \"avg_ns\": 563783942,\n    \"stddev_ns\": 104921,\n    \"avg_ts\": 227.037333,\n    \"stddev_ts\": 0.041154,\n    \"samples_ns\": [ 563761998, 563895332, 563694497 ],\n    \"samples_ts\": [ 227.046, 226.992, 227.073 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:25:29Z\",\n    \"avg_ns\": 13532061980,\n    \"stddev_ns\": 1801185,\n    \"avg_ts\": 37.836067,\n    \"stddev_ts\": 0.005036,\n    \"samples_ns\": [ 13530499022, 13531655124, 13534031794 ],\n    \"samples_ts\": [ 37.8404, 37.8372, 37.8306 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:25:27Z",
+          "avg_ns": 563783942,
+          "stddev_ns": 104921,
+          "avg_ts": 227.037333,
+          "stddev_ts": 0.041154,
+          "samples_ns": [
+            563761998,
+            563895332,
+            563694497
+          ],
+          "samples_ts": [
+            227.046,
+            226.992,
+            227.073
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:25:29Z",
+          "avg_ns": 13532061980,
+          "stddev_ns": 1801185,
+          "avg_ts": 37.836067,
+          "stddev_ts": 0.005036,
+          "samples_ns": [
+            13530499022,
+            13531655124,
+            13534031794
+          ],
+          "samples_ts": [
+            37.8404,
+            37.8372,
+            37.8306
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 197
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:26:31.052803+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:26:11Z\",\n    \"avg_ns\": 2402746245,\n    \"stddev_ns\": 347714,\n    \"avg_ts\": 213.089504,\n    \"stddev_ts\": 0.030216,\n    \"samples_ns\": [ 2402613079, 2403133453, 2402492205 ],\n    \"samples_ts\": [ 213.101, 213.055, 213.112 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:26:20Z\",\n    \"avg_ns\": 3319432286,\n    \"stddev_ns\": 1122756,\n    \"avg_ts\": 38.560814,\n    \"stddev_ts\": 0.013043,\n    \"samples_ns\": [ 3320556613, 3319429138, 3318311107 ],\n    \"samples_ts\": [ 38.5478, 38.5608, 38.5738 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:26:11Z",
+          "avg_ns": 2402746245,
+          "stddev_ns": 347714,
+          "avg_ts": 213.089504,
+          "stddev_ts": 0.030216,
+          "samples_ns": [
+            2402613079,
+            2403133453,
+            2402492205
+          ],
+          "samples_ts": [
+            213.101,
+            213.055,
+            213.112
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:26:20Z",
+          "avg_ns": 3319432286,
+          "stddev_ns": 1122756,
+          "avg_ts": 38.560814,
+          "stddev_ts": 0.013043,
+          "samples_ns": [
+            3320556613,
+            3319429138,
+            3318311107
+          ],
+          "samples_ts": [
+            38.5478,
+            38.5608,
+            38.5738
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 198
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:27:22.022339+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:26:31Z\",\n    \"avg_ns\": 2368973475,\n    \"stddev_ns\": 162721,\n    \"avg_ts\": 216.127368,\n    \"stddev_ts\": 0.013452,\n    \"samples_ns\": [ 2368934581, 2368849376, 2369136470 ],\n    \"samples_ts\": [ 216.131, 216.139, 216.112 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:26:41Z\",\n    \"avg_ns\": 13547462630,\n    \"stddev_ns\": 10347462,\n    \"avg_ts\": 37.793070,\n    \"stddev_ts\": 0.028852,\n    \"samples_ns\": [ 13559138849, 13543813659, 13539435384 ],\n    \"samples_ts\": [ 37.7605, 37.8032, 37.8155 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:26:31Z",
+          "avg_ns": 2368973475,
+          "stddev_ns": 162721,
+          "avg_ts": 216.127368,
+          "stddev_ts": 0.013452,
+          "samples_ns": [
+            2368934581,
+            2368849376,
+            2369136470
+          ],
+          "samples_ts": [
+            216.131,
+            216.139,
+            216.112
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:26:41Z",
+          "avg_ns": 13547462630,
+          "stddev_ns": 10347462,
+          "avg_ts": 37.79307,
+          "stddev_ts": 0.028852,
+          "samples_ns": [
+            13559138849,
+            13543813659,
+            13539435384
+          ],
+          "samples_ts": [
+            37.7605,
+            37.8032,
+            37.8155
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 199
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:27:35.085150+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:27:22Z\",\n    \"avg_ns\": 564249094,\n    \"stddev_ns\": 138369,\n    \"avg_ts\": 226.850173,\n    \"stddev_ts\": 0.053971,\n    \"samples_ns\": [ 564293750, 564355305, 564098229 ],\n    \"samples_ts\": [ 226.832, 226.807, 226.911 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:27:25Z\",\n    \"avg_ns\": 3326073054,\n    \"stddev_ns\": 1327832,\n    \"avg_ts\": 38.483825,\n    \"stddev_ts\": 0.015332,\n    \"samples_ns\": [ 3327568860, 3325045132, 3325605172 ],\n    \"samples_ts\": [ 38.4665, 38.4957, 38.4892 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:27:22Z",
+          "avg_ns": 564249094,
+          "stddev_ns": 138369,
+          "avg_ts": 226.850173,
+          "stddev_ts": 0.053971,
+          "samples_ns": [
+            564293750,
+            564355305,
+            564098229
+          ],
+          "samples_ts": [
+            226.832,
+            226.807,
+            226.911
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:27:25Z",
+          "avg_ns": 3326073054,
+          "stddev_ns": 1327832,
+          "avg_ts": 38.483825,
+          "stddev_ts": 0.015332,
+          "samples_ns": [
+            3327568860,
+            3325045132,
+            3325605172
+          ],
+          "samples_ts": [
+            38.4665,
+            38.4957,
+            38.4892
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 200
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:28:18.885851+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:27:35Z\",\n    \"avg_ns\": 563611347,\n    \"stddev_ns\": 84070,\n    \"avg_ts\": 227.106858,\n    \"stddev_ts\": 0.033875,\n    \"samples_ns\": [ 563535853, 563701946, 563596242 ],\n    \"samples_ts\": [ 227.137, 227.07, 227.113 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:27:38Z\",\n    \"avg_ns\": 13573259756,\n    \"stddev_ns\": 2246318,\n    \"avg_ts\": 37.721227,\n    \"stddev_ts\": 0.006226,\n    \"samples_ns\": [ 13575233619, 13570824827, 13573720824 ],\n    \"samples_ts\": [ 37.7157, 37.728, 37.7199 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:27:35Z",
+          "avg_ns": 563611347,
+          "stddev_ns": 84070,
+          "avg_ts": 227.106858,
+          "stddev_ts": 0.033875,
+          "samples_ns": [
+            563535853,
+            563701946,
+            563596242
+          ],
+          "samples_ts": [
+            227.137,
+            227.07,
+            227.113
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:27:38Z",
+          "avg_ns": 13573259756,
+          "stddev_ns": 2246318,
+          "avg_ts": 37.721227,
+          "stddev_ts": 0.006226,
+          "samples_ns": [
+            13575233619,
+            13570824827,
+            13573720824
+          ],
+          "samples_ts": [
+            37.7157,
+            37.728,
+            37.7199
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 201
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:28:40.062128+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:28:19Z\",\n    \"avg_ns\": 2601409603,\n    \"stddev_ns\": 663425,\n    \"avg_ts\": 196.816380,\n    \"stddev_ts\": 0.050050,\n    \"samples_ns\": [ 2600664974, 2601929152, 2601634684 ],\n    \"samples_ts\": [ 196.873, 196.777, 196.799 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:28:30Z\",\n    \"avg_ns\": 3313554524,\n    \"stddev_ns\": 687786,\n    \"avg_ts\": 38.629213,\n    \"stddev_ts\": 0.007962,\n    \"samples_ns\": [ 3313553403, 3314238037, 3312872134 ],\n    \"samples_ts\": [ 38.6292, 38.6212, 38.6372 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:28:19Z",
+          "avg_ns": 2601409603,
+          "stddev_ns": 663425,
+          "avg_ts": 196.81638,
+          "stddev_ts": 0.05005,
+          "samples_ns": [
+            2600664974,
+            2601929152,
+            2601634684
+          ],
+          "samples_ts": [
+            196.873,
+            196.777,
+            196.799
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:28:30Z",
+          "avg_ns": 3313554524,
+          "stddev_ns": 687786,
+          "avg_ts": 38.629213,
+          "stddev_ts": 0.007962,
+          "samples_ns": [
+            3313553403,
+            3314238037,
+            3312872134
+          ],
+          "samples_ts": [
+            38.6292,
+            38.6212,
+            38.6372
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 202
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:29:31.941846+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:28:40Z\",\n    \"avg_ns\": 2603342655,\n    \"stddev_ns\": 1051571,\n    \"avg_ts\": 196.670252,\n    \"stddev_ts\": 0.079338,\n    \"samples_ns\": [ 2604487537, 2602423610, 2603116819 ],\n    \"samples_ts\": [ 196.584, 196.74, 196.687 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:28:51Z\",\n    \"avg_ns\": 13534911856,\n    \"stddev_ns\": 3585314,\n    \"avg_ts\": 37.828102,\n    \"stddev_ts\": 0.010020,\n    \"samples_ns\": [ 13538473080, 13534959560, 13531302928 ],\n    \"samples_ts\": [ 37.8181, 37.828, 37.8382 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:28:40Z",
+          "avg_ns": 2603342655,
+          "stddev_ns": 1051571,
+          "avg_ts": 196.670252,
+          "stddev_ts": 0.079338,
+          "samples_ns": [
+            2604487537,
+            2602423610,
+            2603116819
+          ],
+          "samples_ts": [
+            196.584,
+            196.74,
+            196.687
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:28:51Z",
+          "avg_ns": 13534911856,
+          "stddev_ns": 3585314,
+          "avg_ts": 37.828102,
+          "stddev_ts": 0.01002,
+          "samples_ns": [
+            13538473080,
+            13534959560,
+            13531302928
+          ],
+          "samples_ts": [
+            37.8181,
+            37.828,
+            37.8382
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 203
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:29:44.947285+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:29:32Z\",\n    \"avg_ns\": 563743968,\n    \"stddev_ns\": 180906,\n    \"avg_ts\": 227.053443,\n    \"stddev_ts\": 0.072862,\n    \"samples_ns\": [ 563745863, 563923920, 563562121 ],\n    \"samples_ts\": [ 227.053, 226.981, 227.127 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:29:34Z\",\n    \"avg_ns\": 3307000837,\n    \"stddev_ns\": 410842,\n    \"avg_ts\": 38.705766,\n    \"stddev_ts\": 0.004762,\n    \"samples_ns\": [ 3307307205, 3307156018, 3306539289 ],\n    \"samples_ts\": [ 38.7022, 38.7039, 38.7112 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:29:32Z",
+          "avg_ns": 563743968,
+          "stddev_ns": 180906,
+          "avg_ts": 227.053443,
+          "stddev_ts": 0.072862,
+          "samples_ns": [
+            563745863,
+            563923920,
+            563562121
+          ],
+          "samples_ts": [
+            227.053,
+            226.981,
+            227.127
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:29:34Z",
+          "avg_ns": 3307000837,
+          "stddev_ns": 410842,
+          "avg_ts": 38.705766,
+          "stddev_ts": 0.004762,
+          "samples_ns": [
+            3307307205,
+            3307156018,
+            3306539289
+          ],
+          "samples_ts": [
+            38.7022,
+            38.7039,
+            38.7112
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 204
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:30:28.492729+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:29:45Z\",\n    \"avg_ns\": 563479699,\n    \"stddev_ns\": 120352,\n    \"avg_ts\": 227.159920,\n    \"stddev_ts\": 0.046588,\n    \"samples_ns\": [ 563411156, 563613139, 563414804 ],\n    \"samples_ts\": [ 227.188, 227.106, 227.186 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:29:47Z\",\n    \"avg_ns\": 13488554276,\n    \"stddev_ns\": 20803683,\n    \"avg_ts\": 37.958168,\n    \"stddev_ts\": 0.058515,\n    \"samples_ns\": [ 13470291049, 13484172669, 13511199112 ],\n    \"samples_ts\": [ 38.0096, 37.9704, 37.8945 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:29:45Z",
+          "avg_ns": 563479699,
+          "stddev_ns": 120352,
+          "avg_ts": 227.15992,
+          "stddev_ts": 0.046588,
+          "samples_ns": [
+            563411156,
+            563613139,
+            563414804
+          ],
+          "samples_ts": [
+            227.188,
+            227.106,
+            227.186
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:29:47Z",
+          "avg_ns": 13488554276,
+          "stddev_ns": 20803683,
+          "avg_ts": 37.958168,
+          "stddev_ts": 0.058515,
+          "samples_ns": [
+            13470291049,
+            13484172669,
+            13511199112
+          ],
+          "samples_ts": [
+            38.0096,
+            37.9704,
+            37.8945
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 205
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:30:48.645681+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:30:29Z\",\n    \"avg_ns\": 2346763037,\n    \"stddev_ns\": 254182,\n    \"avg_ts\": 218.172860,\n    \"stddev_ts\": 0.023629,\n    \"samples_ns\": [ 2346582981, 2347053798, 2346652332 ],\n    \"samples_ts\": [ 218.19, 218.146, 218.183 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:30:38Z\",\n    \"avg_ns\": 3312684911,\n    \"stddev_ns\": 1748295,\n    \"avg_ts\": 38.639360,\n    \"stddev_ts\": 0.020376,\n    \"samples_ns\": [ 3312086537, 3314652836, 3311315361 ],\n    \"samples_ts\": [ 38.6463, 38.6164, 38.6553 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:30:29Z",
+          "avg_ns": 2346763037,
+          "stddev_ns": 254182,
+          "avg_ts": 218.17286,
+          "stddev_ts": 0.023629,
+          "samples_ns": [
+            2346582981,
+            2347053798,
+            2346652332
+          ],
+          "samples_ts": [
+            218.19,
+            218.146,
+            218.183
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:30:38Z",
+          "avg_ns": 3312684911,
+          "stddev_ns": 1748295,
+          "avg_ts": 38.63936,
+          "stddev_ts": 0.020376,
+          "samples_ns": [
+            3312086537,
+            3314652836,
+            3311315361
+          ],
+          "samples_ts": [
+            38.6463,
+            38.6164,
+            38.6553
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 206
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:31:39.428000+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:30:49Z\",\n    \"avg_ns\": 2366892996,\n    \"stddev_ns\": 225217,\n    \"avg_ts\": 216.317343,\n    \"stddev_ts\": 0.020584,\n    \"samples_ns\": [ 2366942361, 2366647191, 2367089436 ],\n    \"samples_ts\": [ 216.313, 216.34, 216.299 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:30:58Z\",\n    \"avg_ns\": 13495526351,\n    \"stddev_ns\": 6264885,\n    \"avg_ts\": 37.938503,\n    \"stddev_ts\": 0.017609,\n    \"samples_ns\": [ 13500626837, 13497415770, 13488536448 ],\n    \"samples_ts\": [ 37.9242, 37.9332, 37.9582 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:30:49Z",
+          "avg_ns": 2366892996,
+          "stddev_ns": 225217,
+          "avg_ts": 216.317343,
+          "stddev_ts": 0.020584,
+          "samples_ns": [
+            2366942361,
+            2366647191,
+            2367089436
+          ],
+          "samples_ts": [
+            216.313,
+            216.34,
+            216.299
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:30:58Z",
+          "avg_ns": 13495526351,
+          "stddev_ns": 6264885,
+          "avg_ts": 37.938503,
+          "stddev_ts": 0.017609,
+          "samples_ns": [
+            13500626837,
+            13497415770,
+            13488536448
+          ],
+          "samples_ts": [
+            37.9242,
+            37.9332,
+            37.9582
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 207
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:31:52.437408+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:31:40Z\",\n    \"avg_ns\": 563656600,\n    \"stddev_ns\": 28592,\n    \"avg_ts\": 227.088621,\n    \"stddev_ts\": 0.006419,\n    \"samples_ns\": [ 563657131, 563672261, 563640409 ],\n    \"samples_ts\": [ 227.088, 227.082, 227.095 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:31:42Z\",\n    \"avg_ns\": 3310183779,\n    \"stddev_ns\": 609315,\n    \"avg_ts\": 38.668549,\n    \"stddev_ts\": 0.007086,\n    \"samples_ns\": [ 3309517898, 3310328552, 3310704888 ],\n    \"samples_ts\": [ 38.6763, 38.6669, 38.6625 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:31:40Z",
+          "avg_ns": 563656600,
+          "stddev_ns": 28592,
+          "avg_ts": 227.088621,
+          "stddev_ts": 0.006419,
+          "samples_ns": [
+            563657131,
+            563672261,
+            563640409
+          ],
+          "samples_ts": [
+            227.088,
+            227.082,
+            227.095
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:31:42Z",
+          "avg_ns": 3310183779,
+          "stddev_ns": 609315,
+          "avg_ts": 38.668549,
+          "stddev_ts": 0.007086,
+          "samples_ns": [
+            3309517898,
+            3310328552,
+            3310704888
+          ],
+          "samples_ts": [
+            38.6763,
+            38.6669,
+            38.6625
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 208
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:32:36.170036+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:31:53Z\",\n    \"avg_ns\": 564246593,\n    \"stddev_ns\": 168278,\n    \"avg_ts\": 226.851183,\n    \"stddev_ts\": 0.066974,\n    \"samples_ns\": [ 564424064, 564093580, 564222136 ],\n    \"samples_ts\": [ 226.78, 226.913, 226.861 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:31:55Z\",\n    \"avg_ns\": 13550245227,\n    \"stddev_ns\": 11752600,\n    \"avg_ts\": 37.785313,\n    \"stddev_ts\": 0.032760,\n    \"samples_ns\": [ 13546491039, 13540828377, 13563416265 ],\n    \"samples_ts\": [ 37.7958, 37.8116, 37.7486 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:31:53Z",
+          "avg_ns": 564246593,
+          "stddev_ns": 168278,
+          "avg_ts": 226.851183,
+          "stddev_ts": 0.066974,
+          "samples_ns": [
+            564424064,
+            564093580,
+            564222136
+          ],
+          "samples_ts": [
+            226.78,
+            226.913,
+            226.861
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:31:55Z",
+          "avg_ns": 13550245227,
+          "stddev_ns": 11752600,
+          "avg_ts": 37.785313,
+          "stddev_ts": 0.03276,
+          "samples_ns": [
+            13546491039,
+            13540828377,
+            13563416265
+          ],
+          "samples_ts": [
+            37.7958,
+            37.8116,
+            37.7486
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 209
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:32:56.528877+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:32:36Z\",\n    \"avg_ns\": 2398032224,\n    \"stddev_ns\": 334675,\n    \"avg_ts\": 213.508393,\n    \"stddev_ts\": 0.029799,\n    \"samples_ns\": [ 2398327653, 2398100262, 2397668757 ],\n    \"samples_ts\": [ 213.482, 213.502, 213.541 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:32:46Z\",\n    \"avg_ns\": 3312094621,\n    \"stddev_ns\": 1861039,\n    \"avg_ts\": 38.646247,\n    \"stddev_ts\": 0.021694,\n    \"samples_ns\": [ 3310247384, 3312070807, 3313965674 ],\n    \"samples_ts\": [ 38.6678, 38.6465, 38.6244 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:32:36Z",
+          "avg_ns": 2398032224,
+          "stddev_ns": 334675,
+          "avg_ts": 213.508393,
+          "stddev_ts": 0.029799,
+          "samples_ns": [
+            2398327653,
+            2398100262,
+            2397668757
+          ],
+          "samples_ts": [
+            213.482,
+            213.502,
+            213.541
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:32:46Z",
+          "avg_ns": 3312094621,
+          "stddev_ns": 1861039,
+          "avg_ts": 38.646247,
+          "stddev_ts": 0.021694,
+          "samples_ns": [
+            3310247384,
+            3312070807,
+            3313965674
+          ],
+          "samples_ts": [
+            38.6678,
+            38.6465,
+            38.6244
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 210
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:33:47.779848+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:32:57Z\",\n    \"avg_ns\": 2400175196,\n    \"stddev_ns\": 211828,\n    \"avg_ts\": 213.317763,\n    \"stddev_ts\": 0.018316,\n    \"samples_ns\": [ 2400370390, 2399959719, 2400195480 ],\n    \"samples_ts\": [ 213.3, 213.337, 213.316 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:33:06Z\",\n    \"avg_ns\": 13607104889,\n    \"stddev_ns\": 7503836,\n    \"avg_ts\": 37.627409,\n    \"stddev_ts\": 0.020749,\n    \"samples_ns\": [ 13599764648, 13614762266, 13606787753 ],\n    \"samples_ts\": [ 37.6477, 37.6062, 37.6283 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:32:57Z",
+          "avg_ns": 2400175196,
+          "stddev_ns": 211828,
+          "avg_ts": 213.317763,
+          "stddev_ts": 0.018316,
+          "samples_ns": [
+            2400370390,
+            2399959719,
+            2400195480
+          ],
+          "samples_ts": [
+            213.3,
+            213.337,
+            213.316
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:33:06Z",
+          "avg_ns": 13607104889,
+          "stddev_ns": 7503836,
+          "avg_ts": 37.627409,
+          "stddev_ts": 0.020749,
+          "samples_ns": [
+            13599764648,
+            13614762266,
+            13606787753
+          ],
+          "samples_ts": [
+            37.6477,
+            37.6062,
+            37.6283
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 211
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:34:00.867971+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:33:48Z\",\n    \"avg_ns\": 563897201,\n    \"stddev_ns\": 116121,\n    \"avg_ts\": 226.991734,\n    \"stddev_ts\": 0.045750,\n    \"samples_ns\": [ 563828553, 564028406, 563834645 ],\n    \"samples_ts\": [ 227.019, 226.939, 227.017 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:33:50Z\",\n    \"avg_ns\": 3320355980,\n    \"stddev_ns\": 1718568,\n    \"avg_ts\": 38.550090,\n    \"stddev_ts\": 0.019959,\n    \"samples_ns\": [ 3321445837, 3318374863, 3321247240 ],\n    \"samples_ts\": [ 38.5374, 38.5731, 38.5397 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:33:48Z",
+          "avg_ns": 563897201,
+          "stddev_ns": 116121,
+          "avg_ts": 226.991734,
+          "stddev_ts": 0.04575,
+          "samples_ns": [
+            563828553,
+            564028406,
+            563834645
+          ],
+          "samples_ts": [
+            227.019,
+            226.939,
+            227.017
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:33:50Z",
+          "avg_ns": 3320355980,
+          "stddev_ns": 1718568,
+          "avg_ts": 38.55009,
+          "stddev_ts": 0.019959,
+          "samples_ns": [
+            3321445837,
+            3318374863,
+            3321247240
+          ],
+          "samples_ts": [
+            38.5374,
+            38.5731,
+            38.5397
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 212
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:34:44.595872+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:34:01Z\",\n    \"avg_ns\": 563754205,\n    \"stddev_ns\": 62835,\n    \"avg_ts\": 227.049306,\n    \"stddev_ts\": 0.023430,\n    \"samples_ns\": [ 563707477, 563819366, 563735773 ],\n    \"samples_ts\": [ 227.068, 227.023, 227.057 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:34:03Z\",\n    \"avg_ns\": 13548232880,\n    \"stddev_ns\": 1329918,\n    \"avg_ts\": 37.790907,\n    \"stddev_ts\": 0.003696,\n    \"samples_ns\": [ 13546737517, 13548701187, 13549259937 ],\n    \"samples_ts\": [ 37.7951, 37.7896, 37.788 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:34:01Z",
+          "avg_ns": 563754205,
+          "stddev_ns": 62835,
+          "avg_ts": 227.049306,
+          "stddev_ts": 0.02343,
+          "samples_ns": [
+            563707477,
+            563819366,
+            563735773
+          ],
+          "samples_ts": [
+            227.068,
+            227.023,
+            227.057
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:34:03Z",
+          "avg_ns": 13548232880,
+          "stddev_ns": 1329918,
+          "avg_ts": 37.790907,
+          "stddev_ts": 0.003696,
+          "samples_ns": [
+            13546737517,
+            13548701187,
+            13549259937
+          ],
+          "samples_ts": [
+            37.7951,
+            37.7896,
+            37.788
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 213
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:35:05.601684+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:34:45Z\",\n    \"avg_ns\": 2545117084,\n    \"stddev_ns\": 177455,\n    \"avg_ts\": 201.169528,\n    \"stddev_ts\": 0.013448,\n    \"samples_ns\": [ 2545162203, 2545260110, 2544928940 ],\n    \"samples_ts\": [ 201.166, 201.158, 201.184 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:34:55Z\",\n    \"avg_ns\": 3323541646,\n    \"stddev_ns\": 625600,\n    \"avg_ts\": 38.513133,\n    \"stddev_ts\": 0.007187,\n    \"samples_ns\": [ 3323403597, 3323002037, 3324219306 ],\n    \"samples_ts\": [ 38.5147, 38.5194, 38.5053 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:34:45Z",
+          "avg_ns": 2545117084,
+          "stddev_ns": 177455,
+          "avg_ts": 201.169528,
+          "stddev_ts": 0.013448,
+          "samples_ns": [
+            2545162203,
+            2545260110,
+            2544928940
+          ],
+          "samples_ts": [
+            201.166,
+            201.158,
+            201.184
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:34:55Z",
+          "avg_ns": 3323541646,
+          "stddev_ns": 625600,
+          "avg_ts": 38.513133,
+          "stddev_ts": 0.007187,
+          "samples_ns": [
+            3323403597,
+            3323002037,
+            3324219306
+          ],
+          "samples_ts": [
+            38.5147,
+            38.5194,
+            38.5053
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 214
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:35:57.257613+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:35:06Z\",\n    \"avg_ns\": 2557161312,\n    \"stddev_ns\": 483013,\n    \"avg_ts\": 200.222023,\n    \"stddev_ts\": 0.037819,\n    \"samples_ns\": [ 2557650662, 2557148380, 2556684894 ],\n    \"samples_ts\": [ 200.184, 200.223, 200.259 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:35:16Z\",\n    \"avg_ns\": 13530268650,\n    \"stddev_ns\": 459768,\n    \"avg_ts\": 37.841082,\n    \"stddev_ts\": 0.001244,\n    \"samples_ns\": [ 13530735191, 13529849338, 13530221422 ],\n    \"samples_ts\": [ 37.8398, 37.8423, 37.8412 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:35:06Z",
+          "avg_ns": 2557161312,
+          "stddev_ns": 483013,
+          "avg_ts": 200.222023,
+          "stddev_ts": 0.037819,
+          "samples_ns": [
+            2557650662,
+            2557148380,
+            2556684894
+          ],
+          "samples_ts": [
+            200.184,
+            200.223,
+            200.259
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:35:16Z",
+          "avg_ns": 13530268650,
+          "stddev_ns": 459768,
+          "avg_ts": 37.841082,
+          "stddev_ts": 0.001244,
+          "samples_ns": [
+            13530735191,
+            13529849338,
+            13530221422
+          ],
+          "samples_ts": [
+            37.8398,
+            37.8423,
+            37.8412
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 215
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:36:10.385022+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:35:57Z\",\n    \"avg_ns\": 399063566,\n    \"stddev_ns\": 2942920,\n    \"avg_ts\": 320.762583,\n    \"stddev_ts\": 2.375447,\n    \"samples_ns\": [ 395669116, 400897404, 400624179 ],\n    \"samples_ts\": [ 323.503, 319.284, 319.501 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:35:59Z\",\n    \"avg_ns\": 3566332646,\n    \"stddev_ns\": 1081461,\n    \"avg_ts\": 35.891214,\n    \"stddev_ts\": 0.010884,\n    \"samples_ns\": [ 3567350193, 3565196969, 3566450776 ],\n    \"samples_ts\": [ 35.881, 35.9026, 35.89 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:35:57Z",
+          "avg_ns": 399063566,
+          "stddev_ns": 2942920,
+          "avg_ts": 320.762583,
+          "stddev_ts": 2.375447,
+          "samples_ns": [
+            395669116,
+            400897404,
+            400624179
+          ],
+          "samples_ts": [
+            323.503,
+            319.284,
+            319.501
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:35:59Z",
+          "avg_ns": 3566332646,
+          "stddev_ns": 1081461,
+          "avg_ts": 35.891214,
+          "stddev_ts": 0.010884,
+          "samples_ns": [
+            3567350193,
+            3565196969,
+            3566450776
+          ],
+          "samples_ts": [
+            35.881,
+            35.9026,
+            35.89
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 216
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:36:56.296033+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:36:11Z\",\n    \"avg_ns\": 401043756,\n    \"stddev_ns\": 545632,\n    \"avg_ts\": 319.167562,\n    \"stddev_ts\": 0.434284,\n    \"samples_ns\": [ 401337407, 400414602, 401379260 ],\n    \"samples_ts\": [ 318.934, 319.669, 318.9 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:36:12Z\",\n    \"avg_ns\": 14489742559,\n    \"stddev_ns\": 3754896,\n    \"avg_ts\": 35.335343,\n    \"stddev_ts\": 0.009156,\n    \"samples_ns\": [ 14493853374, 14488880946, 14486493357 ],\n    \"samples_ts\": [ 35.3253, 35.3374, 35.3433 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:36:11Z",
+          "avg_ns": 401043756,
+          "stddev_ns": 545632,
+          "avg_ts": 319.167562,
+          "stddev_ts": 0.434284,
+          "samples_ns": [
+            401337407,
+            400414602,
+            401379260
+          ],
+          "samples_ts": [
+            318.934,
+            319.669,
+            318.9
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:36:12Z",
+          "avg_ns": 14489742559,
+          "stddev_ns": 3754896,
+          "avg_ts": 35.335343,
+          "stddev_ts": 0.009156,
+          "samples_ns": [
+            14493853374,
+            14488880946,
+            14486493357
+          ],
+          "samples_ts": [
+            35.3253,
+            35.3374,
+            35.3433
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 217
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:37:14.403456+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:36:57Z\",\n    \"avg_ns\": 1651252621,\n    \"stddev_ns\": 4088233,\n    \"avg_ts\": 310.068904,\n    \"stddev_ts\": 0.766864,\n    \"samples_ns\": [ 1655814539, 1647921098, 1650022227 ],\n    \"samples_ts\": [ 309.213, 310.694, 310.299 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:37:03Z\",\n    \"avg_ns\": 3563157647,\n    \"stddev_ns\": 1254402,\n    \"avg_ts\": 35.923196,\n    \"stddev_ts\": 0.012630,\n    \"samples_ns\": [ 3564602396, 3562368282, 3562502264 ],\n    \"samples_ts\": [ 35.9086, 35.9312, 35.9298 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:36:57Z",
+          "avg_ns": 1651252621,
+          "stddev_ns": 4088233,
+          "avg_ts": 310.068904,
+          "stddev_ts": 0.766864,
+          "samples_ns": [
+            1655814539,
+            1647921098,
+            1650022227
+          ],
+          "samples_ts": [
+            309.213,
+            310.694,
+            310.299
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:37:03Z",
+          "avg_ns": 3563157647,
+          "stddev_ns": 1254402,
+          "avg_ts": 35.923196,
+          "stddev_ts": 0.01263,
+          "samples_ns": [
+            3564602396,
+            3562368282,
+            3562502264
+          ],
+          "samples_ts": [
+            35.9086,
+            35.9312,
+            35.9298
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 218
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:38:05.302760+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:37:15Z\",\n    \"avg_ns\": 1648509335,\n    \"stddev_ns\": 321923,\n    \"avg_ts\": 310.583629,\n    \"stddev_ts\": 0.059672,\n    \"samples_ns\": [ 1648873345, 1648358322, 1648296340 ],\n    \"samples_ts\": [ 310.515, 310.612, 310.624 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:37:21Z\",\n    \"avg_ns\": 14490346065,\n    \"stddev_ns\": 8673311,\n    \"avg_ts\": 35.333878,\n    \"stddev_ts\": 0.021140,\n    \"samples_ns\": [ 14500314505, 14484534501, 14486189190 ],\n    \"samples_ts\": [ 35.3096, 35.348, 35.344 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:37:15Z",
+          "avg_ns": 1648509335,
+          "stddev_ns": 321923,
+          "avg_ts": 310.583629,
+          "stddev_ts": 0.059672,
+          "samples_ns": [
+            1648873345,
+            1648358322,
+            1648296340
+          ],
+          "samples_ts": [
+            310.515,
+            310.612,
+            310.624
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:37:21Z",
+          "avg_ns": 14490346065,
+          "stddev_ns": 8673311,
+          "avg_ts": 35.333878,
+          "stddev_ts": 0.02114,
+          "samples_ns": [
+            14500314505,
+            14484534501,
+            14486189190
+          ],
+          "samples_ts": [
+            35.3096,
+            35.348,
+            35.344
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 219
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:38:18.449603+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:38:06Z\",\n    \"avg_ns\": 400120720,\n    \"stddev_ns\": 91474,\n    \"avg_ts\": 319.903463,\n    \"stddev_ts\": 0.069558,\n    \"samples_ns\": [ 400185616, 400154672, 400021874 ],\n    \"samples_ts\": [ 319.852, 319.876, 319.983 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:38:07Z\",\n    \"avg_ns\": 3570527238,\n    \"stddev_ns\": 2169347,\n    \"avg_ts\": 35.849056,\n    \"stddev_ts\": 0.021783,\n    \"samples_ns\": [ 3572555232, 3568239860, 3570786622 ],\n    \"samples_ts\": [ 35.8287, 35.872, 35.8464 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:38:06Z",
+          "avg_ns": 400120720,
+          "stddev_ns": 91474,
+          "avg_ts": 319.903463,
+          "stddev_ts": 0.069558,
+          "samples_ns": [
+            400185616,
+            400154672,
+            400021874
+          ],
+          "samples_ts": [
+            319.852,
+            319.876,
+            319.983
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:38:07Z",
+          "avg_ns": 3570527238,
+          "stddev_ns": 2169347,
+          "avg_ts": 35.849056,
+          "stddev_ts": 0.021783,
+          "samples_ns": [
+            3572555232,
+            3568239860,
+            3570786622
+          ],
+          "samples_ts": [
+            35.8287,
+            35.872,
+            35.8464
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 220
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:39:04.172493+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:38:19Z\",\n    \"avg_ns\": 399641661,\n    \"stddev_ns\": 425384,\n    \"avg_ts\": 320.287170,\n    \"stddev_ts\": 0.340786,\n    \"samples_ns\": [ 399530352, 400111634, 399282997 ],\n    \"samples_ts\": [ 320.376, 319.911, 320.575 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:38:20Z\",\n    \"avg_ns\": 14428945320,\n    \"stddev_ns\": 81378943,\n    \"avg_ts\": 35.484979,\n    \"stddev_ts\": 0.199487,\n    \"samples_ns\": [ 14384459222, 14379506724, 14522870015 ],\n    \"samples_ts\": [ 35.594, 35.6062, 35.2547 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:38:19Z",
+          "avg_ns": 399641661,
+          "stddev_ns": 425384,
+          "avg_ts": 320.28717,
+          "stddev_ts": 0.340786,
+          "samples_ns": [
+            399530352,
+            400111634,
+            399282997
+          ],
+          "samples_ts": [
+            320.376,
+            319.911,
+            320.575
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:38:20Z",
+          "avg_ns": 14428945320,
+          "stddev_ns": 81378943,
+          "avg_ts": 35.484979,
+          "stddev_ts": 0.199487,
+          "samples_ns": [
+            14384459222,
+            14379506724,
+            14522870015
+          ],
+          "samples_ts": [
+            35.594,
+            35.6062,
+            35.2547
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 221
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:39:22.703850+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:39:04Z\",\n    \"avg_ns\": 1733523006,\n    \"stddev_ns\": 3333272,\n    \"avg_ts\": 295.353023,\n    \"stddev_ts\": 0.567384,\n    \"samples_ns\": [ 1737237701, 1730795013, 1732536306 ],\n    \"samples_ts\": [ 294.721, 295.818, 295.521 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:39:11Z\",\n    \"avg_ns\": 3577515389,\n    \"stddev_ns\": 1658065,\n    \"avg_ts\": 35.779027,\n    \"stddev_ts\": 0.016586,\n    \"samples_ns\": [ 3575629694, 3578171364, 3578745109 ],\n    \"samples_ts\": [ 35.7979, 35.7725, 35.7667 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:39:04Z",
+          "avg_ns": 1733523006,
+          "stddev_ns": 3333272,
+          "avg_ts": 295.353023,
+          "stddev_ts": 0.567384,
+          "samples_ns": [
+            1737237701,
+            1730795013,
+            1732536306
+          ],
+          "samples_ts": [
+            294.721,
+            295.818,
+            295.521
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:39:11Z",
+          "avg_ns": 3577515389,
+          "stddev_ns": 1658065,
+          "avg_ts": 35.779027,
+          "stddev_ts": 0.016586,
+          "samples_ns": [
+            3575629694,
+            3578171364,
+            3578745109
+          ],
+          "samples_ts": [
+            35.7979,
+            35.7725,
+            35.7667
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 222
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:40:13.239644+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:39:23Z\",\n    \"avg_ns\": 1688562276,\n    \"stddev_ns\": 1320434,\n    \"avg_ts\": 303.216657,\n    \"stddev_ts\": 0.237217,\n    \"samples_ns\": [ 1689246714, 1689399975, 1687040139 ],\n    \"samples_ts\": [ 303.094, 303.066, 303.49 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:39:30Z\",\n    \"avg_ns\": 14316249628,\n    \"stddev_ns\": 7518541,\n    \"avg_ts\": 35.763563,\n    \"stddev_ts\": 0.018779,\n    \"samples_ns\": [ 14309899697, 14324551702, 14314297485 ],\n    \"samples_ts\": [ 35.7794, 35.7428, 35.7684 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:39:23Z",
+          "avg_ns": 1688562276,
+          "stddev_ns": 1320434,
+          "avg_ts": 303.216657,
+          "stddev_ts": 0.237217,
+          "samples_ns": [
+            1689246714,
+            1689399975,
+            1687040139
+          ],
+          "samples_ts": [
+            303.094,
+            303.066,
+            303.49
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:39:30Z",
+          "avg_ns": 14316249628,
+          "stddev_ns": 7518541,
+          "avg_ts": 35.763563,
+          "stddev_ts": 0.018779,
+          "samples_ns": [
+            14309899697,
+            14324551702,
+            14314297485
+          ],
+          "samples_ts": [
+            35.7794,
+            35.7428,
+            35.7684
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 223
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:40:26.344805+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:40:14Z\",\n    \"avg_ns\": 401841795,\n    \"stddev_ns\": 976034,\n    \"avg_ts\": 318.534570,\n    \"stddev_ts\": 0.773656,\n    \"samples_ns\": [ 400871974, 402823927, 401829484 ],\n    \"samples_ts\": [ 319.304, 317.757, 318.543 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:40:15Z\",\n    \"avg_ns\": 3539400962,\n    \"stddev_ns\": 2098679,\n    \"avg_ts\": 36.164320,\n    \"stddev_ts\": 0.021430,\n    \"samples_ns\": [ 3541259188, 3537127371, 3539816329 ],\n    \"samples_ts\": [ 36.1453, 36.1876, 36.1601 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:40:14Z",
+          "avg_ns": 401841795,
+          "stddev_ns": 976034,
+          "avg_ts": 318.53457,
+          "stddev_ts": 0.773656,
+          "samples_ns": [
+            400871974,
+            402823927,
+            401829484
+          ],
+          "samples_ts": [
+            319.304,
+            317.757,
+            318.543
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:40:15Z",
+          "avg_ns": 3539400962,
+          "stddev_ns": 2098679,
+          "avg_ts": 36.16432,
+          "stddev_ts": 0.02143,
+          "samples_ns": [
+            3541259188,
+            3537127371,
+            3539816329
+          ],
+          "samples_ts": [
+            36.1453,
+            36.1876,
+            36.1601
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 224
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:41:11.941474+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:40:27Z\",\n    \"avg_ns\": 405714388,\n    \"stddev_ns\": 434489,\n    \"avg_ts\": 315.493119,\n    \"stddev_ts\": 0.337914,\n    \"samples_ns\": [ 405263056, 405750302, 406129806 ],\n    \"samples_ts\": [ 315.844, 315.465, 315.17 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:40:28Z\",\n    \"avg_ns\": 14379264357,\n    \"stddev_ns\": 5314077,\n    \"avg_ts\": 35.606832,\n    \"stddev_ts\": 0.013153,\n    \"samples_ns\": [ 14376276292, 14376118493, 14385398287 ],\n    \"samples_ts\": [ 35.6142, 35.6146, 35.5916 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:40:27Z",
+          "avg_ns": 405714388,
+          "stddev_ns": 434489,
+          "avg_ts": 315.493119,
+          "stddev_ts": 0.337914,
+          "samples_ns": [
+            405263056,
+            405750302,
+            406129806
+          ],
+          "samples_ts": [
+            315.844,
+            315.465,
+            315.17
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:40:28Z",
+          "avg_ns": 14379264357,
+          "stddev_ns": 5314077,
+          "avg_ts": 35.606832,
+          "stddev_ts": 0.013153,
+          "samples_ns": [
+            14376276292,
+            14376118493,
+            14385398287
+          ],
+          "samples_ts": [
+            35.6142,
+            35.6146,
+            35.5916
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 225
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:41:32.068755+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:41:12Z\",\n    \"avg_ns\": 2136408297,\n    \"stddev_ns\": 5607771,\n    \"avg_ts\": 239.655666,\n    \"stddev_ts\": 0.629799,\n    \"samples_ns\": [ 2130069731, 2140722028, 2138433134 ],\n    \"samples_ts\": [ 240.368, 239.172, 239.428 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:41:21Z\",\n    \"avg_ns\": 3572998207,\n    \"stddev_ns\": 1448703,\n    \"avg_ts\": 35.824259,\n    \"stddev_ts\": 0.014513,\n    \"samples_ns\": [ 3573075137, 3574405678, 3571513807 ],\n    \"samples_ts\": [ 35.8235, 35.8101, 35.8391 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:41:12Z",
+          "avg_ns": 2136408297,
+          "stddev_ns": 5607771,
+          "avg_ts": 239.655666,
+          "stddev_ts": 0.629799,
+          "samples_ns": [
+            2130069731,
+            2140722028,
+            2138433134
+          ],
+          "samples_ts": [
+            240.368,
+            239.172,
+            239.428
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:41:21Z",
+          "avg_ns": 3572998207,
+          "stddev_ns": 1448703,
+          "avg_ts": 35.824259,
+          "stddev_ts": 0.014513,
+          "samples_ns": [
+            3573075137,
+            3574405678,
+            3571513807
+          ],
+          "samples_ts": [
+            35.8235,
+            35.8101,
+            35.8391
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 226
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:42:25.066217+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:41:32Z\",\n    \"avg_ns\": 2163684883,\n    \"stddev_ns\": 502305,\n    \"avg_ts\": 236.633357,\n    \"stddev_ts\": 0.054942,\n    \"samples_ns\": [ 2163976895, 2163104875, 2163972879 ],\n    \"samples_ts\": [ 236.601, 236.697, 236.602 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:41:41Z\",\n    \"avg_ns\": 14499326667,\n    \"stddev_ns\": 6195139,\n    \"avg_ts\": 35.311989,\n    \"stddev_ts\": 0.015084,\n    \"samples_ns\": [ 14492525697, 14500813818, 14504640488 ],\n    \"samples_ts\": [ 35.3286, 35.3084, 35.299 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:41:32Z",
+          "avg_ns": 2163684883,
+          "stddev_ns": 502305,
+          "avg_ts": 236.633357,
+          "stddev_ts": 0.054942,
+          "samples_ns": [
+            2163976895,
+            2163104875,
+            2163972879
+          ],
+          "samples_ts": [
+            236.601,
+            236.697,
+            236.602
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:41:41Z",
+          "avg_ns": 14499326667,
+          "stddev_ns": 6195139,
+          "avg_ts": 35.311989,
+          "stddev_ts": 0.015084,
+          "samples_ns": [
+            14492525697,
+            14500813818,
+            14504640488
+          ],
+          "samples_ts": [
+            35.3286,
+            35.3084,
+            35.299
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 227
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:42:38.198952+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:42:25Z\",\n    \"avg_ns\": 401271337,\n    \"stddev_ns\": 1108327,\n    \"avg_ts\": 318.987774,\n    \"stddev_ts\": 0.880287,\n    \"samples_ns\": [ 400307287, 401024427, 402482297 ],\n    \"samples_ts\": [ 319.754, 319.183, 318.026 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:42:27Z\",\n    \"avg_ns\": 3564152677,\n    \"stddev_ns\": 1539669,\n    \"avg_ts\": 35.913169,\n    \"stddev_ts\": 0.015495,\n    \"samples_ns\": [ 3562377615, 3565058079, 3565022339 ],\n    \"samples_ts\": [ 35.9311, 35.904, 35.9044 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:42:25Z",
+          "avg_ns": 401271337,
+          "stddev_ns": 1108327,
+          "avg_ts": 318.987774,
+          "stddev_ts": 0.880287,
+          "samples_ns": [
+            400307287,
+            401024427,
+            402482297
+          ],
+          "samples_ts": [
+            319.754,
+            319.183,
+            318.026
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:42:27Z",
+          "avg_ns": 3564152677,
+          "stddev_ns": 1539669,
+          "avg_ts": 35.913169,
+          "stddev_ts": 0.015495,
+          "samples_ns": [
+            3562377615,
+            3565058079,
+            3565022339
+          ],
+          "samples_ts": [
+            35.9311,
+            35.904,
+            35.9044
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 228
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:43:23.660662+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:42:38Z\",\n    \"avg_ns\": 401389941,\n    \"stddev_ns\": 581833,\n    \"avg_ts\": 318.892344,\n    \"stddev_ts\": 0.462040,\n    \"samples_ns\": [ 400790694, 401427257, 401951873 ],\n    \"samples_ts\": [ 319.369, 318.862, 318.446 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:42:40Z\",\n    \"avg_ns\": 14341345141,\n    \"stddev_ns\": 7939206,\n    \"avg_ts\": 35.700982,\n    \"stddev_ts\": 0.019764,\n    \"samples_ns\": [ 14332448026, 14347699825, 14343887574 ],\n    \"samples_ts\": [ 35.7231, 35.6852, 35.6946 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:42:38Z",
+          "avg_ns": 401389941,
+          "stddev_ns": 581833,
+          "avg_ts": 318.892344,
+          "stddev_ts": 0.46204,
+          "samples_ns": [
+            400790694,
+            401427257,
+            401951873
+          ],
+          "samples_ts": [
+            319.369,
+            318.862,
+            318.446
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:42:40Z",
+          "avg_ns": 14341345141,
+          "stddev_ns": 7939206,
+          "avg_ts": 35.700982,
+          "stddev_ts": 0.019764,
+          "samples_ns": [
+            14332448026,
+            14347699825,
+            14343887574
+          ],
+          "samples_ts": [
+            35.7231,
+            35.6852,
+            35.6946
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 229
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:43:41.874311+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:43:24Z\",\n    \"avg_ns\": 1693338703,\n    \"stddev_ns\": 2357456,\n    \"avg_ts\": 302.361636,\n    \"stddev_ts\": 0.420907,\n    \"samples_ns\": [ 1693404294, 1695662321, 1690949495 ],\n    \"samples_ts\": [ 302.35, 301.947, 302.788 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:43:31Z\",\n    \"avg_ns\": 3535366903,\n    \"stddev_ns\": 662579,\n    \"avg_ts\": 36.205578,\n    \"stddev_ts\": 0.006785,\n    \"samples_ns\": [ 3535355881, 3536034925, 3534709903 ],\n    \"samples_ts\": [ 36.2057, 36.1987, 36.2123 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:43:24Z",
+          "avg_ns": 1693338703,
+          "stddev_ns": 2357456,
+          "avg_ts": 302.361636,
+          "stddev_ts": 0.420907,
+          "samples_ns": [
+            1693404294,
+            1695662321,
+            1690949495
+          ],
+          "samples_ts": [
+            302.35,
+            301.947,
+            302.788
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:43:31Z",
+          "avg_ns": 3535366903,
+          "stddev_ns": 662579,
+          "avg_ts": 36.205578,
+          "stddev_ts": 0.006785,
+          "samples_ns": [
+            3535355881,
+            3536034925,
+            3534709903
+          ],
+          "samples_ts": [
+            36.2057,
+            36.1987,
+            36.2123
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 230
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:44:32.318378+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:43:42Z\",\n    \"avg_ns\": 1657490929,\n    \"stddev_ns\": 833021,\n    \"avg_ts\": 308.900687,\n    \"stddev_ts\": 0.155254,\n    \"samples_ns\": [ 1657535852, 1656636355, 1658300580 ],\n    \"samples_ts\": [ 308.892, 309.06, 308.75 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:43:49Z\",\n    \"avg_ns\": 14324323591,\n    \"stddev_ns\": 17941935,\n    \"avg_ts\": 35.743435,\n    \"stddev_ts\": 0.044738,\n    \"samples_ns\": [ 14345007857, 14314991205, 14312971712 ],\n    \"samples_ts\": [ 35.6919, 35.7667, 35.7717 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:43:42Z",
+          "avg_ns": 1657490929,
+          "stddev_ns": 833021,
+          "avg_ts": 308.900687,
+          "stddev_ts": 0.155254,
+          "samples_ns": [
+            1657535852,
+            1656636355,
+            1658300580
+          ],
+          "samples_ts": [
+            308.892,
+            309.06,
+            308.75
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:43:49Z",
+          "avg_ns": 14324323591,
+          "stddev_ns": 17941935,
+          "avg_ts": 35.743435,
+          "stddev_ts": 0.044738,
+          "samples_ns": [
+            14345007857,
+            14314991205,
+            14312971712
+          ],
+          "samples_ts": [
+            35.6919,
+            35.7667,
+            35.7717
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 231
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:44:45.344246+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:44:33Z\",\n    \"avg_ns\": 401437615,\n    \"stddev_ns\": 184622,\n    \"avg_ts\": 318.854070,\n    \"stddev_ts\": 0.144881,\n    \"samples_ns\": [ 401638854, 401390942, 401283051 ],\n    \"samples_ts\": [ 318.694, 318.891, 318.977 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:44:34Z\",\n    \"avg_ns\": 3529872060,\n    \"stddev_ns\": 2273580,\n    \"avg_ts\": 36.261948,\n    \"stddev_ts\": 0.023339,\n    \"samples_ns\": [ 3532208305, 3527670217, 3529737660 ],\n    \"samples_ts\": [ 36.238, 36.2846, 36.2633 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:44:33Z",
+          "avg_ns": 401437615,
+          "stddev_ns": 184622,
+          "avg_ts": 318.85407,
+          "stddev_ts": 0.144881,
+          "samples_ns": [
+            401638854,
+            401390942,
+            401283051
+          ],
+          "samples_ts": [
+            318.694,
+            318.891,
+            318.977
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:44:34Z",
+          "avg_ns": 3529872060,
+          "stddev_ns": 2273580,
+          "avg_ts": 36.261948,
+          "stddev_ts": 0.023339,
+          "samples_ns": [
+            3532208305,
+            3527670217,
+            3529737660
+          ],
+          "samples_ts": [
+            36.238,
+            36.2846,
+            36.2633
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 232
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:45:31.372307+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:44:46Z\",\n    \"avg_ns\": 401672490,\n    \"stddev_ns\": 639974,\n    \"avg_ts\": 318.668117,\n    \"stddev_ts\": 0.507298,\n    \"samples_ns\": [ 402355050, 401575649, 401086772 ],\n    \"samples_ts\": [ 318.127, 318.744, 319.133 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:44:47Z\",\n    \"avg_ns\": 14528767625,\n    \"stddev_ns\": 3883183,\n    \"avg_ts\": 35.240430,\n    \"stddev_ts\": 0.009415,\n    \"samples_ns\": [ 14529120472, 14524721937, 14532460467 ],\n    \"samples_ts\": [ 35.2396, 35.2502, 35.2315 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:44:46Z",
+          "avg_ns": 401672490,
+          "stddev_ns": 639974,
+          "avg_ts": 318.668117,
+          "stddev_ts": 0.507298,
+          "samples_ns": [
+            402355050,
+            401575649,
+            401086772
+          ],
+          "samples_ts": [
+            318.127,
+            318.744,
+            319.133
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:44:47Z",
+          "avg_ns": 14528767625,
+          "stddev_ns": 3883183,
+          "avg_ts": 35.24043,
+          "stddev_ts": 0.009415,
+          "samples_ns": [
+            14529120472,
+            14524721937,
+            14532460467
+          ],
+          "samples_ts": [
+            35.2396,
+            35.2502,
+            35.2315
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 233
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:45:49.878258+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:45:32Z\",\n    \"avg_ns\": 1762214829,\n    \"stddev_ns\": 1440271,\n    \"avg_ts\": 290.543593,\n    \"stddev_ts\": 0.237330,\n    \"samples_ns\": [ 1762578705, 1763437012, 1760628772 ],\n    \"samples_ts\": [ 290.483, 290.342, 290.805 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:45:39Z\",\n    \"avg_ns\": 3552362399,\n    \"stddev_ns\": 1718225,\n    \"avg_ts\": 36.032365,\n    \"stddev_ts\": 0.017403,\n    \"samples_ns\": [ 3554301833, 3551745021, 3551040345 ],\n    \"samples_ts\": [ 36.0127, 36.0386, 36.0458 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:45:32Z",
+          "avg_ns": 1762214829,
+          "stddev_ns": 1440271,
+          "avg_ts": 290.543593,
+          "stddev_ts": 0.23733,
+          "samples_ns": [
+            1762578705,
+            1763437012,
+            1760628772
+          ],
+          "samples_ts": [
+            290.483,
+            290.342,
+            290.805
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:45:39Z",
+          "avg_ns": 3552362399,
+          "stddev_ns": 1718225,
+          "avg_ts": 36.032365,
+          "stddev_ts": 0.017403,
+          "samples_ns": [
+            3554301833,
+            3551745021,
+            3551040345
+          ],
+          "samples_ts": [
+            36.0127,
+            36.0386,
+            36.0458
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 234
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:46:41.828204+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:45:50Z\",\n    \"avg_ns\": 1731208044,\n    \"stddev_ns\": 1012718,\n    \"avg_ts\": 295.747307,\n    \"stddev_ts\": 0.172844,\n    \"samples_ns\": [ 1731102481, 1732268551, 1730253101 ],\n    \"samples_ts\": [ 295.765, 295.566, 295.91 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:45:57Z\",\n    \"avg_ns\": 14714337904,\n    \"stddev_ns\": 3345273,\n    \"avg_ts\": 34.795994,\n    \"stddev_ts\": 0.007906,\n    \"samples_ns\": [ 14715496303, 14710569704, 14716947706 ],\n    \"samples_ts\": [ 34.7933, 34.8049, 34.7898 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:45:50Z",
+          "avg_ns": 1731208044,
+          "stddev_ns": 1012718,
+          "avg_ts": 295.747307,
+          "stddev_ts": 0.172844,
+          "samples_ns": [
+            1731102481,
+            1732268551,
+            1730253101
+          ],
+          "samples_ts": [
+            295.765,
+            295.566,
+            295.91
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:45:57Z",
+          "avg_ns": 14714337904,
+          "stddev_ns": 3345273,
+          "avg_ts": 34.795994,
+          "stddev_ts": 0.007906,
+          "samples_ns": [
+            14715496303,
+            14710569704,
+            14716947706
+          ],
+          "samples_ts": [
+            34.7933,
+            34.8049,
+            34.7898
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 235
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:46:54.961130+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:46:42Z\",\n    \"avg_ns\": 400649470,\n    \"stddev_ns\": 399343,\n    \"avg_ts\": 319.481476,\n    \"stddev_ts\": 0.317773,\n    \"samples_ns\": [ 400970290, 400203614, 400774508 ],\n    \"samples_ts\": [ 319.226, 319.837, 319.382 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:46:44Z\",\n    \"avg_ns\": 3562815161,\n    \"stddev_ns\": 500708,\n    \"avg_ts\": 35.926647,\n    \"stddev_ts\": 0.005013,\n    \"samples_ns\": [ 3562292499, 3563282074, 3562870911 ],\n    \"samples_ts\": [ 35.9319, 35.9219, 35.9261 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:46:42Z",
+          "avg_ns": 400649470,
+          "stddev_ns": 399343,
+          "avg_ts": 319.481476,
+          "stddev_ts": 0.317773,
+          "samples_ns": [
+            400970290,
+            400203614,
+            400774508
+          ],
+          "samples_ts": [
+            319.226,
+            319.837,
+            319.382
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:46:44Z",
+          "avg_ns": 3562815161,
+          "stddev_ns": 500708,
+          "avg_ts": 35.926647,
+          "stddev_ts": 0.005013,
+          "samples_ns": [
+            3562292499,
+            3563282074,
+            3562870911
+          ],
+          "samples_ts": [
+            35.9319,
+            35.9219,
+            35.9261
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 236
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:47:40.849616+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:46:55Z\",\n    \"avg_ns\": 401136603,\n    \"stddev_ns\": 659986,\n    \"avg_ts\": 319.093870,\n    \"stddev_ts\": 0.524673,\n    \"samples_ns\": [ 400954984, 400586442, 401868383 ],\n    \"samples_ts\": [ 319.238, 319.532, 318.512 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:46:57Z\",\n    \"avg_ns\": 14475227727,\n    \"stddev_ns\": 2370802,\n    \"avg_ts\": 35.370774,\n    \"stddev_ts\": 0.005785,\n    \"samples_ns\": [ 14474583059, 14477851046, 14473249077 ],\n    \"samples_ts\": [ 35.3723, 35.3644, 35.3756 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:46:55Z",
+          "avg_ns": 401136603,
+          "stddev_ns": 659986,
+          "avg_ts": 319.09387,
+          "stddev_ts": 0.524673,
+          "samples_ns": [
+            400954984,
+            400586442,
+            401868383
+          ],
+          "samples_ts": [
+            319.238,
+            319.532,
+            318.512
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:46:57Z",
+          "avg_ns": 14475227727,
+          "stddev_ns": 2370802,
+          "avg_ts": 35.370774,
+          "stddev_ts": 0.005785,
+          "samples_ns": [
+            14474583059,
+            14477851046,
+            14473249077
+          ],
+          "samples_ts": [
+            35.3723,
+            35.3644,
+            35.3756
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 237
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:48:00.292365+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:47:41Z\",\n    \"avg_ns\": 2002254647,\n    \"stddev_ns\": 14240713,\n    \"avg_ts\": 255.720389,\n    \"stddev_ts\": 1.826259,\n    \"samples_ns\": [ 1985811325, 2010382622, 2010569995 ],\n    \"samples_ts\": [ 257.829, 254.678, 254.654 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:47:49Z\",\n    \"avg_ns\": 3537087592,\n    \"stddev_ns\": 4654854,\n    \"avg_ts\": 36.188006,\n    \"stddev_ts\": 0.047590,\n    \"samples_ns\": [ 3535371223, 3533535030, 3542356524 ],\n    \"samples_ts\": [ 36.2055, 36.2243, 36.1341 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:47:41Z",
+          "avg_ns": 2002254647,
+          "stddev_ns": 14240713,
+          "avg_ts": 255.720389,
+          "stddev_ts": 1.826259,
+          "samples_ns": [
+            1985811325,
+            2010382622,
+            2010569995
+          ],
+          "samples_ts": [
+            257.829,
+            254.678,
+            254.654
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:47:49Z",
+          "avg_ns": 3537087592,
+          "stddev_ns": 4654854,
+          "avg_ts": 36.188006,
+          "stddev_ts": 0.04759,
+          "samples_ns": [
+            3535371223,
+            3533535030,
+            3542356524
+          ],
+          "samples_ts": [
+            36.2055,
+            36.2243,
+            36.1341
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 238
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:48:52.736484+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:48:01Z\",\n    \"avg_ns\": 2019492322,\n    \"stddev_ns\": 1364870,\n    \"avg_ts\": 253.529142,\n    \"stddev_ts\": 0.171321,\n    \"samples_ns\": [ 2020311486, 2020247893, 2017917588 ],\n    \"samples_ts\": [ 253.426, 253.434, 253.727 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:48:09Z\",\n    \"avg_ns\": 14507081266,\n    \"stddev_ns\": 9590742,\n    \"avg_ts\": 35.293119,\n    \"stddev_ts\": 0.023336,\n    \"samples_ns\": [ 14496472692, 14509635746, 14515135361 ],\n    \"samples_ts\": [ 35.3189, 35.2869, 35.2735 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:48:01Z",
+          "avg_ns": 2019492322,
+          "stddev_ns": 1364870,
+          "avg_ts": 253.529142,
+          "stddev_ts": 0.171321,
+          "samples_ns": [
+            2020311486,
+            2020247893,
+            2017917588
+          ],
+          "samples_ts": [
+            253.426,
+            253.434,
+            253.727
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:48:09Z",
+          "avg_ns": 14507081266,
+          "stddev_ns": 9590742,
+          "avg_ts": 35.293119,
+          "stddev_ts": 0.023336,
+          "samples_ns": [
+            14496472692,
+            14509635746,
+            14515135361
+          ],
+          "samples_ts": [
+            35.3189,
+            35.2869,
+            35.2735
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 239
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:49:05.854914+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:48:53Z\",\n    \"avg_ns\": 401381949,\n    \"stddev_ns\": 522136,\n    \"avg_ts\": 318.898606,\n    \"stddev_ts\": 0.414533,\n    \"samples_ns\": [ 401651811, 401713071, 400780967 ],\n    \"samples_ts\": [ 318.684, 318.635, 319.376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:48:55Z\",\n    \"avg_ns\": 3561530794,\n    \"stddev_ns\": 2007078,\n    \"avg_ts\": 35.939610,\n    \"stddev_ts\": 0.020248,\n    \"samples_ns\": [ 3559344400, 3563286988, 3561960995 ],\n    \"samples_ts\": [ 35.9617, 35.9219, 35.9353 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:48:53Z",
+          "avg_ns": 401381949,
+          "stddev_ns": 522136,
+          "avg_ts": 318.898606,
+          "stddev_ts": 0.414533,
+          "samples_ns": [
+            401651811,
+            401713071,
+            400780967
+          ],
+          "samples_ts": [
+            318.684,
+            318.635,
+            319.376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:48:55Z",
+          "avg_ns": 3561530794,
+          "stddev_ns": 2007078,
+          "avg_ts": 35.93961,
+          "stddev_ts": 0.020248,
+          "samples_ns": [
+            3559344400,
+            3563286988,
+            3561960995
+          ],
+          "samples_ts": [
+            35.9617,
+            35.9219,
+            35.9353
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 240
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:49:51.732685+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:49:06Z\",\n    \"avg_ns\": 401357507,\n    \"stddev_ns\": 361469,\n    \"avg_ts\": 318.917838,\n    \"stddev_ts\": 0.286379,\n    \"samples_ns\": [ 401394156, 400980226, 401698141 ],\n    \"samples_ts\": [ 318.889, 319.218, 318.647 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:49:08Z\",\n    \"avg_ns\": 14477614561,\n    \"stddev_ns\": 13250788,\n    \"avg_ts\": 35.364962,\n    \"stddev_ts\": 0.032350,\n    \"samples_ns\": [ 14492904270, 14470457063, 14469482351 ],\n    \"samples_ts\": [ 35.3276, 35.3824, 35.3848 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:49:06Z",
+          "avg_ns": 401357507,
+          "stddev_ns": 361469,
+          "avg_ts": 318.917838,
+          "stddev_ts": 0.286379,
+          "samples_ns": [
+            401394156,
+            400980226,
+            401698141
+          ],
+          "samples_ts": [
+            318.889,
+            319.218,
+            318.647
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:49:08Z",
+          "avg_ns": 14477614561,
+          "stddev_ns": 13250788,
+          "avg_ts": 35.364962,
+          "stddev_ts": 0.03235,
+          "samples_ns": [
+            14492904270,
+            14470457063,
+            14469482351
+          ],
+          "samples_ts": [
+            35.3276,
+            35.3824,
+            35.3848
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 241
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:50:09.899006+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:49:52Z\",\n    \"avg_ns\": 1651162876,\n    \"stddev_ns\": 1465153,\n    \"avg_ts\": 310.084654,\n    \"stddev_ts\": 0.275122,\n    \"samples_ns\": [ 1649565676, 1652442866, 1651480087 ],\n    \"samples_ts\": [ 310.385, 309.844, 310.025 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:49:59Z\",\n    \"avg_ns\": 3574144513,\n    \"stddev_ns\": 3847290,\n    \"avg_ts\": 35.812793,\n    \"stddev_ts\": 0.038568,\n    \"samples_ns\": [ 3569822089, 3577194055, 3575417395 ],\n    \"samples_ts\": [ 35.8561, 35.7822, 35.8 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:49:52Z",
+          "avg_ns": 1651162876,
+          "stddev_ns": 1465153,
+          "avg_ts": 310.084654,
+          "stddev_ts": 0.275122,
+          "samples_ns": [
+            1649565676,
+            1652442866,
+            1651480087
+          ],
+          "samples_ts": [
+            310.385,
+            309.844,
+            310.025
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:49:59Z",
+          "avg_ns": 3574144513,
+          "stddev_ns": 3847290,
+          "avg_ts": 35.812793,
+          "stddev_ts": 0.038568,
+          "samples_ns": [
+            3569822089,
+            3577194055,
+            3575417395
+          ],
+          "samples_ts": [
+            35.8561,
+            35.7822,
+            35.8
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 242
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:51:01.054672+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:50:10Z\",\n    \"avg_ns\": 1670409952,\n    \"stddev_ns\": 1546610,\n    \"avg_ts\": 306.511759,\n    \"stddev_ts\": 0.283835,\n    \"samples_ns\": [ 1671503876, 1671084806, 1668641175 ],\n    \"samples_ts\": [ 306.311, 306.388, 306.836 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:50:17Z\",\n    \"avg_ns\": 14543187261,\n    \"stddev_ns\": 15615606,\n    \"avg_ts\": 35.205515,\n    \"stddev_ts\": 0.037777,\n    \"samples_ns\": [ 14561214168, 14534498790, 14533848826 ],\n    \"samples_ts\": [ 35.1619, 35.2265, 35.2281 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:50:10Z",
+          "avg_ns": 1670409952,
+          "stddev_ns": 1546610,
+          "avg_ts": 306.511759,
+          "stddev_ts": 0.283835,
+          "samples_ns": [
+            1671503876,
+            1671084806,
+            1668641175
+          ],
+          "samples_ts": [
+            306.311,
+            306.388,
+            306.836
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:50:17Z",
+          "avg_ns": 14543187261,
+          "stddev_ns": 15615606,
+          "avg_ts": 35.205515,
+          "stddev_ts": 0.037777,
+          "samples_ns": [
+            14561214168,
+            14534498790,
+            14533848826
+          ],
+          "samples_ts": [
+            35.1619,
+            35.2265,
+            35.2281
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 243
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:51:14.173606+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:51:01Z\",\n    \"avg_ns\": 400845731,\n    \"stddev_ns\": 761307,\n    \"avg_ts\": 319.325610,\n    \"stddev_ts\": 0.605824,\n    \"samples_ns\": [ 400448239, 400365442, 401723512 ],\n    \"samples_ts\": [ 319.642, 319.708, 318.627 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:51:03Z\",\n    \"avg_ns\": 3557357116,\n    \"stddev_ns\": 927598,\n    \"avg_ts\": 35.981770,\n    \"stddev_ts\": 0.009362,\n    \"samples_ns\": [ 3558411070, 3556675972, 3556984307 ],\n    \"samples_ts\": [ 35.9711, 35.9887, 35.9855 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:51:01Z",
+          "avg_ns": 400845731,
+          "stddev_ns": 761307,
+          "avg_ts": 319.32561,
+          "stddev_ts": 0.605824,
+          "samples_ns": [
+            400448239,
+            400365442,
+            401723512
+          ],
+          "samples_ts": [
+            319.642,
+            319.708,
+            318.627
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:51:03Z",
+          "avg_ns": 3557357116,
+          "stddev_ns": 927598,
+          "avg_ts": 35.98177,
+          "stddev_ts": 0.009362,
+          "samples_ns": [
+            3558411070,
+            3556675972,
+            3556984307
+          ],
+          "samples_ts": [
+            35.9711,
+            35.9887,
+            35.9855
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 244
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:52:00.046255+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:51:14Z\",\n    \"avg_ns\": 399856629,\n    \"stddev_ns\": 558410,\n    \"avg_ts\": 320.115153,\n    \"stddev_ts\": 0.446772,\n    \"samples_ns\": [ 399295969, 399861883, 400412036 ],\n    \"samples_ts\": [ 320.564, 320.111, 319.671 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:51:16Z\",\n    \"avg_ns\": 14477994291,\n    \"stddev_ns\": 8171160,\n    \"avg_ts\": 35.364022,\n    \"stddev_ts\": 0.019953,\n    \"samples_ns\": [ 14474833391, 14487273728, 14471875754 ],\n    \"samples_ts\": [ 35.3717, 35.3414, 35.379 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:51:14Z",
+          "avg_ns": 399856629,
+          "stddev_ns": 558410,
+          "avg_ts": 320.115153,
+          "stddev_ts": 0.446772,
+          "samples_ns": [
+            399295969,
+            399861883,
+            400412036
+          ],
+          "samples_ts": [
+            320.564,
+            320.111,
+            319.671
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:51:16Z",
+          "avg_ns": 14477994291,
+          "stddev_ns": 8171160,
+          "avg_ts": 35.364022,
+          "stddev_ts": 0.019953,
+          "samples_ns": [
+            14474833391,
+            14487273728,
+            14471875754
+          ],
+          "samples_ts": [
+            35.3717,
+            35.3414,
+            35.379
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 245
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:52:18.291669+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:52:00Z\",\n    \"avg_ns\": 1688604178,\n    \"stddev_ns\": 160158,\n    \"avg_ts\": 303.209011,\n    \"stddev_ts\": 0.028759,\n    \"samples_ns\": [ 1688751021, 1688433398, 1688628115 ],\n    \"samples_ts\": [ 303.183, 303.24, 303.205 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:52:07Z\",\n    \"avg_ns\": 3535222407,\n    \"stddev_ns\": 608021,\n    \"avg_ts\": 36.207058,\n    \"stddev_ts\": 0.006167,\n    \"samples_ns\": [ 3535911413, 3534796829, 3534958981 ],\n    \"samples_ts\": [ 36.2, 36.2114, 36.2098 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:52:00Z",
+          "avg_ns": 1688604178,
+          "stddev_ns": 160158,
+          "avg_ts": 303.209011,
+          "stddev_ts": 0.028759,
+          "samples_ns": [
+            1688751021,
+            1688433398,
+            1688628115
+          ],
+          "samples_ts": [
+            303.183,
+            303.24,
+            303.205
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:52:07Z",
+          "avg_ns": 3535222407,
+          "stddev_ns": 608021,
+          "avg_ts": 36.207058,
+          "stddev_ts": 0.006167,
+          "samples_ns": [
+            3535911413,
+            3534796829,
+            3534958981
+          ],
+          "samples_ts": [
+            36.2,
+            36.2114,
+            36.2098
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 246
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:53:09.501396+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:52:19Z\",\n    \"avg_ns\": 1689042677,\n    \"stddev_ns\": 1647668,\n    \"avg_ts\": 303.130484,\n    \"stddev_ts\": 0.295548,\n    \"samples_ns\": [ 1690803032, 1688786187, 1687538813 ],\n    \"samples_ts\": [ 302.815, 303.176, 303.4 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:52:25Z\",\n    \"avg_ns\": 14532976369,\n    \"stddev_ns\": 2342285,\n    \"avg_ts\": 35.230224,\n    \"stddev_ts\": 0.005671,\n    \"samples_ns\": [ 14532996744, 14530627067, 14535305297 ],\n    \"samples_ts\": [ 35.2302, 35.2359, 35.2246 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:52:19Z",
+          "avg_ns": 1689042677,
+          "stddev_ns": 1647668,
+          "avg_ts": 303.130484,
+          "stddev_ts": 0.295548,
+          "samples_ns": [
+            1690803032,
+            1688786187,
+            1687538813
+          ],
+          "samples_ts": [
+            302.815,
+            303.176,
+            303.4
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:52:25Z",
+          "avg_ns": 14532976369,
+          "stddev_ns": 2342285,
+          "avg_ts": 35.230224,
+          "stddev_ts": 0.005671,
+          "samples_ns": [
+            14532996744,
+            14530627067,
+            14535305297
+          ],
+          "samples_ts": [
+            35.2302,
+            35.2359,
+            35.2246
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 247
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:53:22.618477+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:53:10Z\",\n    \"avg_ns\": 399855352,\n    \"stddev_ns\": 247578,\n    \"avg_ts\": 320.115841,\n    \"stddev_ts\": 0.197605,\n    \"samples_ns\": [ 399922514, 399581954, 400061589 ],\n    \"samples_ts\": [ 320.062, 320.335, 319.951 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:53:11Z\",\n    \"avg_ns\": 3560851759,\n    \"stddev_ns\": 1948218,\n    \"avg_ts\": 35.946463,\n    \"stddev_ts\": 0.019663,\n    \"samples_ns\": [ 3561665392, 3562260293, 3558629593 ],\n    \"samples_ts\": [ 35.9382, 35.9322, 35.9689 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:53:10Z",
+          "avg_ns": 399855352,
+          "stddev_ns": 247578,
+          "avg_ts": 320.115841,
+          "stddev_ts": 0.197605,
+          "samples_ns": [
+            399922514,
+            399581954,
+            400061589
+          ],
+          "samples_ts": [
+            320.062,
+            320.335,
+            319.951
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:53:11Z",
+          "avg_ns": 3560851759,
+          "stddev_ns": 1948218,
+          "avg_ts": 35.946463,
+          "stddev_ts": 0.019663,
+          "samples_ns": [
+            3561665392,
+            3562260293,
+            3558629593
+          ],
+          "samples_ts": [
+            35.9382,
+            35.9322,
+            35.9689
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 248
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:54:08.616213+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:53:23Z\",\n    \"avg_ns\": 400592440,\n    \"stddev_ns\": 408123,\n    \"avg_ts\": 319.526969,\n    \"stddev_ts\": 0.324963,\n    \"samples_ns\": [ 401059847, 400406821, 400310653 ],\n    \"samples_ts\": [ 319.154, 319.675, 319.752 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:53:24Z\",\n    \"avg_ns\": 14518082103,\n    \"stddev_ns\": 2422018,\n    \"avg_ts\": 35.266367,\n    \"stddev_ts\": 0.005876,\n    \"samples_ns\": [ 14516732445, 14520874826, 14516639039 ],\n    \"samples_ts\": [ 35.2696, 35.2596, 35.2699 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:53:23Z",
+          "avg_ns": 400592440,
+          "stddev_ns": 408123,
+          "avg_ts": 319.526969,
+          "stddev_ts": 0.324963,
+          "samples_ns": [
+            401059847,
+            400406821,
+            400310653
+          ],
+          "samples_ts": [
+            319.154,
+            319.675,
+            319.752
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:53:24Z",
+          "avg_ns": 14518082103,
+          "stddev_ns": 2422018,
+          "avg_ts": 35.266367,
+          "stddev_ts": 0.005876,
+          "samples_ns": [
+            14516732445,
+            14520874826,
+            14516639039
+          ],
+          "samples_ts": [
+            35.2696,
+            35.2596,
+            35.2699
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 249
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:54:28.757204+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:54:09Z\",\n    \"avg_ns\": 2153541676,\n    \"stddev_ns\": 261927,\n    \"avg_ts\": 237.747897,\n    \"stddev_ts\": 0.028915,\n    \"samples_ns\": [ 2153830152, 2153318748, 2153476128 ],\n    \"samples_ts\": [ 237.716, 237.773, 237.755 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:54:17Z\",\n    \"avg_ns\": 3560145331,\n    \"stddev_ns\": 1390845,\n    \"avg_ts\": 35.953592,\n    \"stddev_ts\": 0.014033,\n    \"samples_ns\": [ 3560055605, 3558802804, 3561577585 ],\n    \"samples_ts\": [ 35.9545, 35.9672, 35.9391 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:54:09Z",
+          "avg_ns": 2153541676,
+          "stddev_ns": 261927,
+          "avg_ts": 237.747897,
+          "stddev_ts": 0.028915,
+          "samples_ns": [
+            2153830152,
+            2153318748,
+            2153476128
+          ],
+          "samples_ts": [
+            237.716,
+            237.773,
+            237.755
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:54:17Z",
+          "avg_ns": 3560145331,
+          "stddev_ns": 1390845,
+          "avg_ts": 35.953592,
+          "stddev_ts": 0.014033,
+          "samples_ns": [
+            3560055605,
+            3558802804,
+            3561577585
+          ],
+          "samples_ts": [
+            35.9545,
+            35.9672,
+            35.9391
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 250
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:55:21.311020+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:54:29Z\",\n    \"avg_ns\": 2046334305,\n    \"stddev_ns\": 1028246,\n    \"avg_ts\": 250.203539,\n    \"stddev_ts\": 0.125468,\n    \"samples_ns\": [ 2045373761, 2047415589, 2046213567 ],\n    \"samples_ts\": [ 250.321, 250.071, 250.218 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:54:37Z\",\n    \"avg_ns\": 14516411442,\n    \"stddev_ns\": 1285940,\n    \"avg_ts\": 35.270425,\n    \"stddev_ts\": 0.003125,\n    \"samples_ns\": [ 14514927872, 14517207150, 14517099304 ],\n    \"samples_ts\": [ 35.274, 35.2685, 35.2688 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:54:29Z",
+          "avg_ns": 2046334305,
+          "stddev_ns": 1028246,
+          "avg_ts": 250.203539,
+          "stddev_ts": 0.125468,
+          "samples_ns": [
+            2045373761,
+            2047415589,
+            2046213567
+          ],
+          "samples_ts": [
+            250.321,
+            250.071,
+            250.218
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:54:37Z",
+          "avg_ns": 14516411442,
+          "stddev_ns": 1285940,
+          "avg_ts": 35.270425,
+          "stddev_ts": 0.003125,
+          "samples_ns": [
+            14514927872,
+            14517207150,
+            14517099304
+          ],
+          "samples_ts": [
+            35.274,
+            35.2685,
+            35.2688
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 251
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:55:35.345326+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:55:22Z\",\n    \"avg_ns\": 318581690,\n    \"stddev_ns\": 624327,\n    \"avg_ts\": 401.781807,\n    \"stddev_ts\": 0.786201,\n    \"samples_ns\": [ 318281879, 318164100, 319299092 ],\n    \"samples_ts\": [ 402.159, 402.308, 400.878 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:55:23Z\",\n    \"avg_ns\": 3971277633,\n    \"stddev_ns\": 15270705,\n    \"avg_ts\": 32.231758,\n    \"stddev_ts\": 0.123704,\n    \"samples_ns\": [ 3988623867, 3965347070, 3959861963 ],\n    \"samples_ts\": [ 32.0913, 32.2796, 32.3244 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:55:22Z",
+          "avg_ns": 318581690,
+          "stddev_ns": 624327,
+          "avg_ts": 401.781807,
+          "stddev_ts": 0.786201,
+          "samples_ns": [
+            318281879,
+            318164100,
+            319299092
+          ],
+          "samples_ts": [
+            402.159,
+            402.308,
+            400.878
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:55:23Z",
+          "avg_ns": 3971277633,
+          "stddev_ns": 15270705,
+          "avg_ts": 32.231758,
+          "stddev_ts": 0.123704,
+          "samples_ns": [
+            3988623867,
+            3965347070,
+            3959861963
+          ],
+          "samples_ts": [
+            32.0913,
+            32.2796,
+            32.3244
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 252
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:56:26.254476+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:55:36Z\",\n    \"avg_ns\": 319531297,\n    \"stddev_ns\": 727287,\n    \"avg_ts\": 400.588121,\n    \"stddev_ts\": 0.911515,\n    \"samples_ns\": [ 319468031, 318837709, 320288151 ],\n    \"samples_ts\": [ 400.666, 401.458, 399.64 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:55:37Z\",\n    \"avg_ns\": 16255682881,\n    \"stddev_ns\": 12822848,\n    \"avg_ts\": 31.496690,\n    \"stddev_ts\": 0.024840,\n    \"samples_ns\": [ 16253476060, 16269465917, 16244106666 ],\n    \"samples_ts\": [ 31.501, 31.47, 31.5191 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:55:36Z",
+          "avg_ns": 319531297,
+          "stddev_ns": 727287,
+          "avg_ts": 400.588121,
+          "stddev_ts": 0.911515,
+          "samples_ns": [
+            319468031,
+            318837709,
+            320288151
+          ],
+          "samples_ts": [
+            400.666,
+            401.458,
+            399.64
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:55:37Z",
+          "avg_ns": 16255682881,
+          "stddev_ns": 12822848,
+          "avg_ts": 31.49669,
+          "stddev_ts": 0.02484,
+          "samples_ns": [
+            16253476060,
+            16269465917,
+            16244106666
+          ],
+          "samples_ts": [
+            31.501,
+            31.47,
+            31.5191
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 253
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:56:44.367401+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:56:27Z\",\n    \"avg_ns\": 1322339082,\n    \"stddev_ns\": 5207280,\n    \"avg_ts\": 387.196677,\n    \"stddev_ts\": 1.526247,\n    \"samples_ns\": [ 1316738434, 1327033873, 1323244940 ],\n    \"samples_ts\": [ 388.84, 385.823, 386.928 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:56:32Z\",\n    \"avg_ns\": 3978839453,\n    \"stddev_ns\": 13199361,\n    \"avg_ts\": 32.170420,\n    \"stddev_ts\": 0.106526,\n    \"samples_ns\": [ 3969872027, 3993995755, 3972650579 ],\n    \"samples_ts\": [ 32.2429, 32.0481, 32.2203 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:56:27Z",
+          "avg_ns": 1322339082,
+          "stddev_ns": 5207280,
+          "avg_ts": 387.196677,
+          "stddev_ts": 1.526247,
+          "samples_ns": [
+            1316738434,
+            1327033873,
+            1323244940
+          ],
+          "samples_ts": [
+            388.84,
+            385.823,
+            386.928
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:56:32Z",
+          "avg_ns": 3978839453,
+          "stddev_ns": 13199361,
+          "avg_ts": 32.17042,
+          "stddev_ts": 0.106526,
+          "samples_ns": [
+            3969872027,
+            3993995755,
+            3972650579
+          ],
+          "samples_ts": [
+            32.2429,
+            32.0481,
+            32.2203
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 254
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:57:39.191798+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:56:45Z\",\n    \"avg_ns\": 1336839738,\n    \"stddev_ns\": 1870995,\n    \"avg_ts\": 382.993304,\n    \"stddev_ts\": 0.535692,\n    \"samples_ns\": [ 1338878960, 1336436950, 1335203305 ],\n    \"samples_ts\": [ 382.409, 383.108, 383.462 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:56:50Z\",\n    \"avg_ns\": 16199441218,\n    \"stddev_ns\": 24722980,\n    \"avg_ts\": 31.606077,\n    \"stddev_ts\": 0.048212,\n    \"samples_ns\": [ 16226443163, 16193963002, 16177917491 ],\n    \"samples_ts\": [ 31.5534, 31.6167, 31.6481 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:56:45Z",
+          "avg_ns": 1336839738,
+          "stddev_ns": 1870995,
+          "avg_ts": 382.993304,
+          "stddev_ts": 0.535692,
+          "samples_ns": [
+            1338878960,
+            1336436950,
+            1335203305
+          ],
+          "samples_ts": [
+            382.409,
+            383.108,
+            383.462
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:56:50Z",
+          "avg_ns": 16199441218,
+          "stddev_ns": 24722980,
+          "avg_ts": 31.606077,
+          "stddev_ts": 0.048212,
+          "samples_ns": [
+            16226443163,
+            16193963002,
+            16177917491
+          ],
+          "samples_ts": [
+            31.5534,
+            31.6167,
+            31.6481
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 255
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:57:53.276206+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:57:39Z\",\n    \"avg_ns\": 321178872,\n    \"stddev_ns\": 2127469,\n    \"avg_ts\": 398.543442,\n    \"stddev_ts\": 2.632014,\n    \"samples_ns\": [ 319514336, 320446494, 323575786 ],\n    \"samples_ts\": [ 400.608, 399.443, 395.58 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:57:41Z\",\n    \"avg_ns\": 3980463563,\n    \"stddev_ns\": 6803366,\n    \"avg_ts\": 32.157121,\n    \"stddev_ts\": 0.054906,\n    \"samples_ns\": [ 3977205473, 3988282639, 3975902579 ],\n    \"samples_ts\": [ 32.1834, 32.094, 32.1939 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:57:39Z",
+          "avg_ns": 321178872,
+          "stddev_ns": 2127469,
+          "avg_ts": 398.543442,
+          "stddev_ts": 2.632014,
+          "samples_ns": [
+            319514336,
+            320446494,
+            323575786
+          ],
+          "samples_ts": [
+            400.608,
+            399.443,
+            395.58
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:57:41Z",
+          "avg_ns": 3980463563,
+          "stddev_ns": 6803366,
+          "avg_ts": 32.157121,
+          "stddev_ts": 0.054906,
+          "samples_ns": [
+            3977205473,
+            3988282639,
+            3975902579
+          ],
+          "samples_ts": [
+            32.1834,
+            32.094,
+            32.1939
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 256
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:58:44.039882+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:57:54Z\",\n    \"avg_ns\": 319998315,\n    \"stddev_ns\": 929326,\n    \"avg_ts\": 400.004350,\n    \"stddev_ts\": 1.159660,\n    \"samples_ns\": [ 319662145, 319284216, 321048586 ],\n    \"samples_ts\": [ 400.423, 400.897, 398.694 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:57:55Z\",\n    \"avg_ns\": 16198273674,\n    \"stddev_ns\": 2435302,\n    \"avg_ts\": 31.608307,\n    \"stddev_ts\": 0.004752,\n    \"samples_ns\": [ 16200999580, 16196312576, 16197508866 ],\n    \"samples_ts\": [ 31.603, 31.6121, 31.6098 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:57:54Z",
+          "avg_ns": 319998315,
+          "stddev_ns": 929326,
+          "avg_ts": 400.00435,
+          "stddev_ts": 1.15966,
+          "samples_ns": [
+            319662145,
+            319284216,
+            321048586
+          ],
+          "samples_ts": [
+            400.423,
+            400.897,
+            398.694
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:57:55Z",
+          "avg_ns": 16198273674,
+          "stddev_ns": 2435302,
+          "avg_ts": 31.608307,
+          "stddev_ts": 0.004752,
+          "samples_ns": [
+            16200999580,
+            16196312576,
+            16197508866
+          ],
+          "samples_ts": [
+            31.603,
+            31.6121,
+            31.6098
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 257
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:59:02.202527+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:58:44Z\",\n    \"avg_ns\": 1343358129,\n    \"stddev_ns\": 1695310,\n    \"avg_ts\": 381.134809,\n    \"stddev_ts\": 0.480964,\n    \"samples_ns\": [ 1345077643, 1341688105, 1343308639 ],\n    \"samples_ts\": [ 380.647, 381.609, 381.148 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:58:50Z\",\n    \"avg_ns\": 3978951545,\n    \"stddev_ns\": 3710050,\n    \"avg_ts\": 32.169297,\n    \"stddev_ts\": 0.030001,\n    \"samples_ns\": [ 3980460112, 3974725996, 3981668529 ],\n    \"samples_ts\": [ 32.1571, 32.2035, 32.1473 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:58:44Z",
+          "avg_ns": 1343358129,
+          "stddev_ns": 1695310,
+          "avg_ts": 381.134809,
+          "stddev_ts": 0.480964,
+          "samples_ns": [
+            1345077643,
+            1341688105,
+            1343308639
+          ],
+          "samples_ts": [
+            380.647,
+            381.609,
+            381.148
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:58:50Z",
+          "avg_ns": 3978951545,
+          "stddev_ns": 3710050,
+          "avg_ts": 32.169297,
+          "stddev_ts": 0.030001,
+          "samples_ns": [
+            3980460112,
+            3974725996,
+            3981668529
+          ],
+          "samples_ts": [
+            32.1571,
+            32.2035,
+            32.1473
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 258
+    },
+    {
+      "timestamp_utc": "2025-12-08T22:59:57.038504+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:59:02Z\",\n    \"avg_ns\": 1392977289,\n    \"stddev_ns\": 847144,\n    \"avg_ts\": 367.558129,\n    \"stddev_ts\": 0.223250,\n    \"samples_ns\": [ 1393935987, 1392333820, 1392662061 ],\n    \"samples_ts\": [ 367.305, 367.728, 367.641 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:59:08Z\",\n    \"avg_ns\": 16139478942,\n    \"stddev_ns\": 21988395,\n    \"avg_ts\": 31.723492,\n    \"stddev_ts\": 0.043252,\n    \"samples_ns\": [ 16114178059, 16153961379, 16150297390 ],\n    \"samples_ts\": [ 31.7733, 31.695, 31.7022 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:59:02Z",
+          "avg_ns": 1392977289,
+          "stddev_ns": 847144,
+          "avg_ts": 367.558129,
+          "stddev_ts": 0.22325,
+          "samples_ns": [
+            1393935987,
+            1392333820,
+            1392662061
+          ],
+          "samples_ts": [
+            367.305,
+            367.728,
+            367.641
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:59:08Z",
+          "avg_ns": 16139478942,
+          "stddev_ns": 21988395,
+          "avg_ts": 31.723492,
+          "stddev_ts": 0.043252,
+          "samples_ns": [
+            16114178059,
+            16153961379,
+            16150297390
+          ],
+          "samples_ts": [
+            31.7733,
+            31.695,
+            31.7022
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 259
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:00:11.026820+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:59:57Z\",\n    \"avg_ns\": 319612619,\n    \"stddev_ns\": 106955,\n    \"avg_ts\": 400.484841,\n    \"stddev_ts\": 0.130217,\n    \"samples_ns\": [ 319605891, 319512223, 319719745 ],\n    \"samples_ts\": [ 400.493, 400.611, 400.351 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T22:59:59Z\",\n    \"avg_ns\": 3956706387,\n    \"stddev_ns\": 6946431,\n    \"avg_ts\": 32.350205,\n    \"stddev_ts\": 0.056849,\n    \"samples_ns\": [ 3960432764, 3960994178, 3948692220 ],\n    \"samples_ts\": [ 32.3197, 32.3151, 32.4158 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:59:57Z",
+          "avg_ns": 319612619,
+          "stddev_ns": 106955,
+          "avg_ts": 400.484841,
+          "stddev_ts": 0.130217,
+          "samples_ns": [
+            319605891,
+            319512223,
+            319719745
+          ],
+          "samples_ts": [
+            400.493,
+            400.611,
+            400.351
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T22:59:59Z",
+          "avg_ns": 3956706387,
+          "stddev_ns": 6946431,
+          "avg_ts": 32.350205,
+          "stddev_ts": 0.056849,
+          "samples_ns": [
+            3960432764,
+            3960994178,
+            3948692220
+          ],
+          "samples_ts": [
+            32.3197,
+            32.3151,
+            32.4158
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 260
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:01:01.272784+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:00:11Z\",\n    \"avg_ns\": 321390182,\n    \"stddev_ns\": 698103,\n    \"avg_ts\": 398.271041,\n    \"stddev_ts\": 0.864388,\n    \"samples_ns\": [ 322163783, 320807157, 321199606 ],\n    \"samples_ts\": [ 397.313, 398.994, 398.506 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:00:13Z\",\n    \"avg_ns\": 16038815605,\n    \"stddev_ns\": 14419526,\n    \"avg_ts\": 31.922574,\n    \"stddev_ts\": 0.028712,\n    \"samples_ns\": [ 16022235224, 16048413034, 16045798559 ],\n    \"samples_ts\": [ 31.9556, 31.9035, 31.9087 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:00:11Z",
+          "avg_ns": 321390182,
+          "stddev_ns": 698103,
+          "avg_ts": 398.271041,
+          "stddev_ts": 0.864388,
+          "samples_ns": [
+            322163783,
+            320807157,
+            321199606
+          ],
+          "samples_ts": [
+            397.313,
+            398.994,
+            398.506
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:00:13Z",
+          "avg_ns": 16038815605,
+          "stddev_ns": 14419526,
+          "avg_ts": 31.922574,
+          "stddev_ts": 0.028712,
+          "samples_ns": [
+            16022235224,
+            16048413034,
+            16045798559
+          ],
+          "samples_ts": [
+            31.9556,
+            31.9035,
+            31.9087
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 261
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:01:21.744220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:01:02Z\",\n    \"avg_ns\": 1909483385,\n    \"stddev_ns\": 3184757,\n    \"avg_ts\": 268.135849,\n    \"stddev_ts\": 0.446819,\n    \"samples_ns\": [ 1906902714, 1908505691, 1913041752 ],\n    \"samples_ts\": [ 268.498, 268.273, 267.637 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:01:09Z\",\n    \"avg_ns\": 3994009762,\n    \"stddev_ns\": 3057800,\n    \"avg_ts\": 32.048006,\n    \"stddev_ts\": 0.024546,\n    \"samples_ns\": [ 3996152688, 3995368535, 3990508063 ],\n    \"samples_ts\": [ 32.0308, 32.0371, 32.0761 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:01:02Z",
+          "avg_ns": 1909483385,
+          "stddev_ns": 3184757,
+          "avg_ts": 268.135849,
+          "stddev_ts": 0.446819,
+          "samples_ns": [
+            1906902714,
+            1908505691,
+            1913041752
+          ],
+          "samples_ts": [
+            268.498,
+            268.273,
+            267.637
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:01:09Z",
+          "avg_ns": 3994009762,
+          "stddev_ns": 3057800,
+          "avg_ts": 32.048006,
+          "stddev_ts": 0.024546,
+          "samples_ns": [
+            3996152688,
+            3995368535,
+            3990508063
+          ],
+          "samples_ts": [
+            32.0308,
+            32.0371,
+            32.0761
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 262
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:02:17.814922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:01:22Z\",\n    \"avg_ns\": 1731855598,\n    \"stddev_ns\": 2555545,\n    \"avg_ts\": 295.637087,\n    \"stddev_ts\": 0.436363,\n    \"samples_ns\": [ 1729157222, 1734239140, 1732170432 ],\n    \"samples_ts\": [ 296.098, 295.23, 295.583 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:01:29Z\",\n    \"avg_ns\": 16088238846,\n    \"stddev_ns\": 27432854,\n    \"avg_ts\": 31.824552,\n    \"stddev_ts\": 0.054273,\n    \"samples_ns\": [ 16060046072, 16114841429, 16089829039 ],\n    \"samples_ts\": [ 31.8804, 31.772, 31.8213 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:01:22Z",
+          "avg_ns": 1731855598,
+          "stddev_ns": 2555545,
+          "avg_ts": 295.637087,
+          "stddev_ts": 0.436363,
+          "samples_ns": [
+            1729157222,
+            1734239140,
+            1732170432
+          ],
+          "samples_ts": [
+            296.098,
+            295.23,
+            295.583
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:01:29Z",
+          "avg_ns": 16088238846,
+          "stddev_ns": 27432854,
+          "avg_ts": 31.824552,
+          "stddev_ts": 0.054273,
+          "samples_ns": [
+            16060046072,
+            16114841429,
+            16089829039
+          ],
+          "samples_ts": [
+            31.8804,
+            31.772,
+            31.8213
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 263
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:02:31.928742+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:02:18Z\",\n    \"avg_ns\": 323049308,\n    \"stddev_ns\": 860711,\n    \"avg_ts\": 396.226217,\n    \"stddev_ts\": 1.054898,\n    \"samples_ns\": [ 323981021, 322283856, 322883047 ],\n    \"samples_ts\": [ 395.085, 397.165, 396.428 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:02:19Z\",\n    \"avg_ns\": 3990973348,\n    \"stddev_ns\": 2561115,\n    \"avg_ts\": 32.072385,\n    \"stddev_ts\": 0.020569,\n    \"samples_ns\": [ 3993883076, 3989065237, 3989971732 ],\n    \"samples_ts\": [ 32.049, 32.0877, 32.0804 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:02:18Z",
+          "avg_ns": 323049308,
+          "stddev_ns": 860711,
+          "avg_ts": 396.226217,
+          "stddev_ts": 1.054898,
+          "samples_ns": [
+            323981021,
+            322283856,
+            322883047
+          ],
+          "samples_ts": [
+            395.085,
+            397.165,
+            396.428
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:02:19Z",
+          "avg_ns": 3990973348,
+          "stddev_ns": 2561115,
+          "avg_ts": 32.072385,
+          "stddev_ts": 0.020569,
+          "samples_ns": [
+            3993883076,
+            3989065237,
+            3989971732
+          ],
+          "samples_ts": [
+            32.049,
+            32.0877,
+            32.0804
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 264
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:03:22.919008+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:02:32Z\",\n    \"avg_ns\": 327900970,\n    \"stddev_ns\": 565469,\n    \"avg_ts\": 390.362533,\n    \"stddev_ts\": 0.672769,\n    \"samples_ns\": [ 328414747, 327992114, 327296051 ],\n    \"samples_ts\": [ 389.751, 390.253, 391.083 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:02:33Z\",\n    \"avg_ns\": 16270845854,\n    \"stddev_ns\": 38854231,\n    \"avg_ts\": 31.467445,\n    \"stddev_ts\": 0.075044,\n    \"samples_ns\": [ 16244691498, 16252354104, 16315491961 ],\n    \"samples_ts\": [ 31.518, 31.5031, 31.3812 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:02:32Z",
+          "avg_ns": 327900970,
+          "stddev_ns": 565469,
+          "avg_ts": 390.362533,
+          "stddev_ts": 0.672769,
+          "samples_ns": [
+            328414747,
+            327992114,
+            327296051
+          ],
+          "samples_ts": [
+            389.751,
+            390.253,
+            391.083
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:02:33Z",
+          "avg_ns": 16270845854,
+          "stddev_ns": 38854231,
+          "avg_ts": 31.467445,
+          "stddev_ts": 0.075044,
+          "samples_ns": [
+            16244691498,
+            16252354104,
+            16315491961
+          ],
+          "samples_ts": [
+            31.518,
+            31.5031,
+            31.3812
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 265
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:03:40.938621+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:03:23Z\",\n    \"avg_ns\": 1308295441,\n    \"stddev_ns\": 855707,\n    \"avg_ts\": 391.349026,\n    \"stddev_ts\": 0.255952,\n    \"samples_ns\": [ 1307466298, 1309175444, 1308244581 ],\n    \"samples_ts\": [ 391.597, 391.086, 391.364 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:03:28Z\",\n    \"avg_ns\": 3972387086,\n    \"stddev_ns\": 6575413,\n    \"avg_ts\": 32.222498,\n    \"stddev_ts\": 0.053287,\n    \"samples_ns\": [ 3979976495, 3968784000, 3968400763 ],\n    \"samples_ts\": [ 32.161, 32.2517, 32.2548 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:03:23Z",
+          "avg_ns": 1308295441,
+          "stddev_ns": 855707,
+          "avg_ts": 391.349026,
+          "stddev_ts": 0.255952,
+          "samples_ns": [
+            1307466298,
+            1309175444,
+            1308244581
+          ],
+          "samples_ts": [
+            391.597,
+            391.086,
+            391.364
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:03:28Z",
+          "avg_ns": 3972387086,
+          "stddev_ns": 6575413,
+          "avg_ts": 32.222498,
+          "stddev_ts": 0.053287,
+          "samples_ns": [
+            3979976495,
+            3968784000,
+            3968400763
+          ],
+          "samples_ts": [
+            32.161,
+            32.2517,
+            32.2548
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 266
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:04:35.358938+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:03:41Z\",\n    \"avg_ns\": 1313373629,\n    \"stddev_ns\": 1302370,\n    \"avg_ts\": 389.836010,\n    \"stddev_ts\": 0.386764,\n    \"samples_ns\": [ 1313895234, 1311891305, 1314334348 ],\n    \"samples_ts\": [ 389.681, 390.276, 389.551 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:03:46Z\",\n    \"avg_ns\": 16100926796,\n    \"stddev_ns\": 607368,\n    \"avg_ts\": 31.799412,\n    \"stddev_ts\": 0.001200,\n    \"samples_ns\": [ 16100229869, 16101343199, 16101207320 ],\n    \"samples_ts\": [ 31.8008, 31.7986, 31.7989 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:03:41Z",
+          "avg_ns": 1313373629,
+          "stddev_ns": 1302370,
+          "avg_ts": 389.83601,
+          "stddev_ts": 0.386764,
+          "samples_ns": [
+            1313895234,
+            1311891305,
+            1314334348
+          ],
+          "samples_ts": [
+            389.681,
+            390.276,
+            389.551
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:03:46Z",
+          "avg_ns": 16100926796,
+          "stddev_ns": 607368,
+          "avg_ts": 31.799412,
+          "stddev_ts": 0.0012,
+          "samples_ns": [
+            16100229869,
+            16101343199,
+            16101207320
+          ],
+          "samples_ts": [
+            31.8008,
+            31.7986,
+            31.7989
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 267
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:04:49.426868+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:04:36Z\",\n    \"avg_ns\": 320361704,\n    \"stddev_ns\": 677039,\n    \"avg_ts\": 399.549568,\n    \"stddev_ts\": 0.844294,\n    \"samples_ns\": [ 319629963, 320964550, 320490601 ],\n    \"samples_ts\": [ 400.463, 398.798, 399.388 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:04:37Z\",\n    \"avg_ns\": 3963833035,\n    \"stddev_ns\": 11960706,\n    \"avg_ts\": 32.292172,\n    \"stddev_ts\": 0.097603,\n    \"samples_ns\": [ 3950060914, 3969826305, 3971611888 ],\n    \"samples_ts\": [ 32.4046, 32.2432, 32.2287 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:04:36Z",
+          "avg_ns": 320361704,
+          "stddev_ns": 677039,
+          "avg_ts": 399.549568,
+          "stddev_ts": 0.844294,
+          "samples_ns": [
+            319629963,
+            320964550,
+            320490601
+          ],
+          "samples_ts": [
+            400.463,
+            398.798,
+            399.388
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:04:37Z",
+          "avg_ns": 3963833035,
+          "stddev_ns": 11960706,
+          "avg_ts": 32.292172,
+          "stddev_ts": 0.097603,
+          "samples_ns": [
+            3950060914,
+            3969826305,
+            3971611888
+          ],
+          "samples_ts": [
+            32.4046,
+            32.2432,
+            32.2287
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 268
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:05:39.699511+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:04:50Z\",\n    \"avg_ns\": 322349165,\n    \"stddev_ns\": 1042642,\n    \"avg_ts\": 397.087710,\n    \"stddev_ts\": 1.283098,\n    \"samples_ns\": [ 322158077, 321415440, 323473979 ],\n    \"samples_ts\": [ 397.32, 398.238, 395.704 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:04:51Z\",\n    \"avg_ns\": 16045691285,\n    \"stddev_ns\": 25553461,\n    \"avg_ts\": 31.908932,\n    \"stddev_ts\": 0.050820,\n    \"samples_ns\": [ 16070742217, 16046667726, 16019663913 ],\n    \"samples_ts\": [ 31.8591, 31.9069, 31.9607 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:04:50Z",
+          "avg_ns": 322349165,
+          "stddev_ns": 1042642,
+          "avg_ts": 397.08771,
+          "stddev_ts": 1.283098,
+          "samples_ns": [
+            322158077,
+            321415440,
+            323473979
+          ],
+          "samples_ts": [
+            397.32,
+            398.238,
+            395.704
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:04:51Z",
+          "avg_ns": 16045691285,
+          "stddev_ns": 25553461,
+          "avg_ts": 31.908932,
+          "stddev_ts": 0.05082,
+          "samples_ns": [
+            16070742217,
+            16046667726,
+            16019663913
+          ],
+          "samples_ts": [
+            31.8591,
+            31.9069,
+            31.9607
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 269
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:05:58.122127+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:05:40Z\",\n    \"avg_ns\": 1397720382,\n    \"stddev_ns\": 3923070,\n    \"avg_ts\": 366.312671,\n    \"stddev_ts\": 1.027446,\n    \"samples_ns\": [ 1397102945, 1394142822, 1401915380 ],\n    \"samples_ts\": [ 366.473, 367.251, 365.215 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:05:46Z\",\n    \"avg_ns\": 3988161917,\n    \"stddev_ns\": 9441470,\n    \"avg_ts\": 32.095106,\n    \"stddev_ts\": 0.075965,\n    \"samples_ns\": [ 3987657662, 3997845199, 3978982891 ],\n    \"samples_ts\": [ 32.099, 32.0172, 32.169 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:05:40Z",
+          "avg_ns": 1397720382,
+          "stddev_ns": 3923070,
+          "avg_ts": 366.312671,
+          "stddev_ts": 1.027446,
+          "samples_ns": [
+            1397102945,
+            1394142822,
+            1401915380
+          ],
+          "samples_ts": [
+            366.473,
+            367.251,
+            365.215
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:05:46Z",
+          "avg_ns": 3988161917,
+          "stddev_ns": 9441470,
+          "avg_ts": 32.095106,
+          "stddev_ts": 0.075965,
+          "samples_ns": [
+            3987657662,
+            3997845199,
+            3978982891
+          ],
+          "samples_ts": [
+            32.099,
+            32.0172,
+            32.169
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 270
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:06:52.734718+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:05:58Z\",\n    \"avg_ns\": 1393827465,\n    \"stddev_ns\": 2916754,\n    \"avg_ts\": 367.334914,\n    \"stddev_ts\": 0.767704,\n    \"samples_ns\": [ 1392093807, 1392193918, 1397194671 ],\n    \"samples_ts\": [ 367.791, 367.765, 366.449 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:06:04Z\",\n    \"avg_ns\": 16061833441,\n    \"stddev_ns\": 10466226,\n    \"avg_ts\": 31.876818,\n    \"stddev_ts\": 0.020774,\n    \"samples_ns\": [ 16050890875, 16062862080, 16071747368 ],\n    \"samples_ts\": [ 31.8985, 31.8748, 31.8571 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:05:58Z",
+          "avg_ns": 1393827465,
+          "stddev_ns": 2916754,
+          "avg_ts": 367.334914,
+          "stddev_ts": 0.767704,
+          "samples_ns": [
+            1392093807,
+            1392193918,
+            1397194671
+          ],
+          "samples_ts": [
+            367.791,
+            367.765,
+            366.449
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:06:04Z",
+          "avg_ns": 16061833441,
+          "stddev_ns": 10466226,
+          "avg_ts": 31.876818,
+          "stddev_ts": 0.020774,
+          "samples_ns": [
+            16050890875,
+            16062862080,
+            16071747368
+          ],
+          "samples_ts": [
+            31.8985,
+            31.8748,
+            31.8571
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 271
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:07:06.763597+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:06:53Z\",\n    \"avg_ns\": 321608858,\n    \"stddev_ns\": 1946203,\n    \"avg_ts\": 398.008677,\n    \"stddev_ts\": 2.401649,\n    \"samples_ns\": [ 320898117, 323810534, 320117923 ],\n    \"samples_ts\": [ 398.88, 395.293, 399.853 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:06:54Z\",\n    \"avg_ns\": 3966520755,\n    \"stddev_ns\": 7332516,\n    \"avg_ts\": 32.270168,\n    \"stddev_ts\": 0.059620,\n    \"samples_ns\": [ 3974491052, 3965008988, 3960062226 ],\n    \"samples_ts\": [ 32.2054, 32.2824, 32.3227 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:06:53Z",
+          "avg_ns": 321608858,
+          "stddev_ns": 1946203,
+          "avg_ts": 398.008677,
+          "stddev_ts": 2.401649,
+          "samples_ns": [
+            320898117,
+            323810534,
+            320117923
+          ],
+          "samples_ts": [
+            398.88,
+            395.293,
+            399.853
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:06:54Z",
+          "avg_ns": 3966520755,
+          "stddev_ns": 7332516,
+          "avg_ts": 32.270168,
+          "stddev_ts": 0.05962,
+          "samples_ns": [
+            3974491052,
+            3965008988,
+            3960062226
+          ],
+          "samples_ts": [
+            32.2054,
+            32.2824,
+            32.3227
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 272
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:07:57.271664+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:07:07Z\",\n    \"avg_ns\": 322249807,\n    \"stddev_ns\": 3606525,\n    \"avg_ts\": 397.240333,\n    \"stddev_ts\": 4.417271,\n    \"samples_ns\": [ 320206664, 326414021, 320128736 ],\n    \"samples_ts\": [ 399.742, 392.14, 399.839 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:07:08Z\",\n    \"avg_ns\": 16126129717,\n    \"stddev_ns\": 13194954,\n    \"avg_ts\": 31.749728,\n    \"stddev_ts\": 0.025970,\n    \"samples_ns\": [ 16140629982, 16122928962, 16114830208 ],\n    \"samples_ts\": [ 31.7212, 31.756, 31.772 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:07:07Z",
+          "avg_ns": 322249807,
+          "stddev_ns": 3606525,
+          "avg_ts": 397.240333,
+          "stddev_ts": 4.417271,
+          "samples_ns": [
+            320206664,
+            326414021,
+            320128736
+          ],
+          "samples_ts": [
+            399.742,
+            392.14,
+            399.839
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:07:08Z",
+          "avg_ns": 16126129717,
+          "stddev_ns": 13194954,
+          "avg_ts": 31.749728,
+          "stddev_ts": 0.02597,
+          "samples_ns": [
+            16140629982,
+            16122928962,
+            16114830208
+          ],
+          "samples_ts": [
+            31.7212,
+            31.756,
+            31.772
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 273
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:08:17.756294+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:07:58Z\",\n    \"avg_ns\": 1897271225,\n    \"stddev_ns\": 1985578,\n    \"avg_ts\": 269.861455,\n    \"stddev_ts\": 0.282350,\n    \"samples_ns\": [ 1895473733, 1896937376, 1899402566 ],\n    \"samples_ts\": [ 270.117, 269.909, 269.558 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:08:05Z\",\n    \"avg_ns\": 4013522974,\n    \"stddev_ns\": 17529452,\n    \"avg_ts\": 31.892586,\n    \"stddev_ts\": 0.139026,\n    \"samples_ns\": [ 4033204207, 4007776319, 3999588398 ],\n    \"samples_ts\": [ 31.7366, 31.9379, 32.0033 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:07:58Z",
+          "avg_ns": 1897271225,
+          "stddev_ns": 1985578,
+          "avg_ts": 269.861455,
+          "stddev_ts": 0.28235,
+          "samples_ns": [
+            1895473733,
+            1896937376,
+            1899402566
+          ],
+          "samples_ts": [
+            270.117,
+            269.909,
+            269.558
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:08:05Z",
+          "avg_ns": 4013522974,
+          "stddev_ns": 17529452,
+          "avg_ts": 31.892586,
+          "stddev_ts": 0.139026,
+          "samples_ns": [
+            4033204207,
+            4007776319,
+            3999588398
+          ],
+          "samples_ts": [
+            31.7366,
+            31.9379,
+            32.0033
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 274
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:09:13.687814+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:08:18Z\",\n    \"avg_ns\": 1675201527,\n    \"stddev_ns\": 8317234,\n    \"avg_ts\": 305.639882,\n    \"stddev_ts\": 1.514315,\n    \"samples_ns\": [ 1668472505, 1672631823, 1684500254 ],\n    \"samples_ts\": [ 306.868, 306.104, 303.948 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:08:25Z\",\n    \"avg_ns\": 16118489465,\n    \"stddev_ns\": 27384422,\n    \"avg_ts\": 31.764824,\n    \"stddev_ts\": 0.053995,\n    \"samples_ns\": [ 16088642125, 16124371078, 16142455192 ],\n    \"samples_ts\": [ 31.8237, 31.7532, 31.7176 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:08:18Z",
+          "avg_ns": 1675201527,
+          "stddev_ns": 8317234,
+          "avg_ts": 305.639882,
+          "stddev_ts": 1.514315,
+          "samples_ns": [
+            1668472505,
+            1672631823,
+            1684500254
+          ],
+          "samples_ts": [
+            306.868,
+            306.104,
+            303.948
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:08:25Z",
+          "avg_ns": 16118489465,
+          "stddev_ns": 27384422,
+          "avg_ts": 31.764824,
+          "stddev_ts": 0.053995,
+          "samples_ns": [
+            16088642125,
+            16124371078,
+            16142455192
+          ],
+          "samples_ts": [
+            31.8237,
+            31.7532,
+            31.7176
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 275
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:09:27.753126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:09:14Z\",\n    \"avg_ns\": 320354919,\n    \"stddev_ns\": 668742,\n    \"avg_ts\": 399.558002,\n    \"stddev_ts\": 0.833095,\n    \"samples_ns\": [ 319925532, 321125433, 320013792 ],\n    \"samples_ts\": [ 400.093, 398.598, 399.983 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:09:15Z\",\n    \"avg_ns\": 3979894863,\n    \"stddev_ns\": 2925416,\n    \"avg_ts\": 32.161665,\n    \"stddev_ts\": 0.023625,\n    \"samples_ns\": [ 3983252563, 3977902208, 3978529819 ],\n    \"samples_ts\": [ 32.1345, 32.1778, 32.1727 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:09:14Z",
+          "avg_ns": 320354919,
+          "stddev_ns": 668742,
+          "avg_ts": 399.558002,
+          "stddev_ts": 0.833095,
+          "samples_ns": [
+            319925532,
+            321125433,
+            320013792
+          ],
+          "samples_ts": [
+            400.093,
+            398.598,
+            399.983
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:09:15Z",
+          "avg_ns": 3979894863,
+          "stddev_ns": 2925416,
+          "avg_ts": 32.161665,
+          "stddev_ts": 0.023625,
+          "samples_ns": [
+            3983252563,
+            3977902208,
+            3978529819
+          ],
+          "samples_ts": [
+            32.1345,
+            32.1778,
+            32.1727
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 276
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:10:17.961909+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:09:28Z\",\n    \"avg_ns\": 320718810,\n    \"stddev_ns\": 403069,\n    \"avg_ts\": 399.103921,\n    \"stddev_ts\": 0.501511,\n    \"samples_ns\": [ 320331720, 321136154, 320688556 ],\n    \"samples_ts\": [ 399.586, 398.585, 399.141 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:09:29Z\",\n    \"avg_ns\": 16027842089,\n    \"stddev_ns\": 23346801,\n    \"avg_ts\": 31.944458,\n    \"stddev_ts\": 0.046505,\n    \"samples_ns\": [ 16053700195, 16021513026, 16008313048 ],\n    \"samples_ts\": [ 31.893, 31.957, 31.9834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:09:28Z",
+          "avg_ns": 320718810,
+          "stddev_ns": 403069,
+          "avg_ts": 399.103921,
+          "stddev_ts": 0.501511,
+          "samples_ns": [
+            320331720,
+            321136154,
+            320688556
+          ],
+          "samples_ts": [
+            399.586,
+            398.585,
+            399.141
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:09:29Z",
+          "avg_ns": 16027842089,
+          "stddev_ns": 23346801,
+          "avg_ts": 31.944458,
+          "stddev_ts": 0.046505,
+          "samples_ns": [
+            16053700195,
+            16021513026,
+            16008313048
+          ],
+          "samples_ts": [
+            31.893,
+            31.957,
+            31.9834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 277
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:10:36.281917+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:10:18Z\",\n    \"avg_ns\": 1341024874,\n    \"stddev_ns\": 1105066,\n    \"avg_ts\": 381.797714,\n    \"stddev_ts\": 0.314144,\n    \"samples_ns\": [ 1342280400, 1340587457, 1340206767 ],\n    \"samples_ts\": [ 381.44, 381.922, 382.031 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:10:24Z\",\n    \"avg_ns\": 4036634555,\n    \"stddev_ns\": 6198048,\n    \"avg_ts\": 31.709633,\n    \"stddev_ts\": 0.048726,\n    \"samples_ns\": [ 4040457531, 4029484139, 4039961997 ],\n    \"samples_ts\": [ 31.6796, 31.7659, 31.6835 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:10:18Z",
+          "avg_ns": 1341024874,
+          "stddev_ns": 1105066,
+          "avg_ts": 381.797714,
+          "stddev_ts": 0.314144,
+          "samples_ns": [
+            1342280400,
+            1340587457,
+            1340206767
+          ],
+          "samples_ts": [
+            381.44,
+            381.922,
+            382.031
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:10:24Z",
+          "avg_ns": 4036634555,
+          "stddev_ns": 6198048,
+          "avg_ts": 31.709633,
+          "stddev_ts": 0.048726,
+          "samples_ns": [
+            4040457531,
+            4029484139,
+            4039961997
+          ],
+          "samples_ts": [
+            31.6796,
+            31.7659,
+            31.6835
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 278
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:11:30.586596+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:10:37Z\",\n    \"avg_ns\": 1313563824,\n    \"stddev_ns\": 1059505,\n    \"avg_ts\": 389.779478,\n    \"stddev_ts\": 0.314148,\n    \"samples_ns\": [ 1313393459, 1312600451, 1314697563 ],\n    \"samples_ts\": [ 389.83, 390.065, 389.443 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:10:42Z\",\n    \"avg_ns\": 16066921650,\n    \"stddev_ns\": 6623880,\n    \"avg_ts\": 31.866718,\n    \"stddev_ts\": 0.013139,\n    \"samples_ns\": [ 16059897993, 16073055876, 16067811081 ],\n    \"samples_ts\": [ 31.8807, 31.8546, 31.865 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:10:37Z",
+          "avg_ns": 1313563824,
+          "stddev_ns": 1059505,
+          "avg_ts": 389.779478,
+          "stddev_ts": 0.314148,
+          "samples_ns": [
+            1313393459,
+            1312600451,
+            1314697563
+          ],
+          "samples_ts": [
+            389.83,
+            390.065,
+            389.443
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:10:42Z",
+          "avg_ns": 16066921650,
+          "stddev_ns": 6623880,
+          "avg_ts": 31.866718,
+          "stddev_ts": 0.013139,
+          "samples_ns": [
+            16059897993,
+            16073055876,
+            16067811081
+          ],
+          "samples_ts": [
+            31.8807,
+            31.8546,
+            31.865
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 279
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:11:44.614935+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:11:31Z\",\n    \"avg_ns\": 319739596,\n    \"stddev_ns\": 883230,\n    \"avg_ts\": 400.327805,\n    \"stddev_ts\": 1.105040,\n    \"samples_ns\": [ 319578810, 320692174, 318947804 ],\n    \"samples_ts\": [ 400.527, 399.137, 401.32 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:11:32Z\",\n    \"avg_ns\": 3966315706,\n    \"stddev_ns\": 7658536,\n    \"avg_ts\": 32.271843,\n    \"stddev_ts\": 0.062274,\n    \"samples_ns\": [ 3964557607, 3959689091, 3974700420 ],\n    \"samples_ts\": [ 32.2861, 32.3258, 32.2037 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:11:31Z",
+          "avg_ns": 319739596,
+          "stddev_ns": 883230,
+          "avg_ts": 400.327805,
+          "stddev_ts": 1.10504,
+          "samples_ns": [
+            319578810,
+            320692174,
+            318947804
+          ],
+          "samples_ts": [
+            400.527,
+            399.137,
+            401.32
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:11:32Z",
+          "avg_ns": 3966315706,
+          "stddev_ns": 7658536,
+          "avg_ts": 32.271843,
+          "stddev_ts": 0.062274,
+          "samples_ns": [
+            3964557607,
+            3959689091,
+            3974700420
+          ],
+          "samples_ts": [
+            32.2861,
+            32.3258,
+            32.2037
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 280
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:12:35.321012+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:11:45Z\",\n    \"avg_ns\": 321662014,\n    \"stddev_ns\": 4608755,\n    \"avg_ts\": 397.987254,\n    \"stddev_ts\": 5.657269,\n    \"samples_ns\": [ 318593947, 326961752, 319430344 ],\n    \"samples_ts\": [ 401.765, 391.483, 400.713 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:11:46Z\",\n    \"avg_ns\": 16189232215,\n    \"stddev_ns\": 9628203,\n    \"avg_ts\": 31.625967,\n    \"stddev_ts\": 0.018810,\n    \"samples_ns\": [ 16197827061, 16191040739, 16178828846 ],\n    \"samples_ts\": [ 31.6092, 31.6224, 31.6463 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:11:45Z",
+          "avg_ns": 321662014,
+          "stddev_ns": 4608755,
+          "avg_ts": 397.987254,
+          "stddev_ts": 5.657269,
+          "samples_ns": [
+            318593947,
+            326961752,
+            319430344
+          ],
+          "samples_ts": [
+            401.765,
+            391.483,
+            400.713
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:11:46Z",
+          "avg_ns": 16189232215,
+          "stddev_ns": 9628203,
+          "avg_ts": 31.625967,
+          "stddev_ts": 0.01881,
+          "samples_ns": [
+            16197827061,
+            16191040739,
+            16178828846
+          ],
+          "samples_ts": [
+            31.6092,
+            31.6224,
+            31.6463
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 281
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:12:53.722469+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:12:36Z\",\n    \"avg_ns\": 1395446756,\n    \"stddev_ns\": 5291732,\n    \"avg_ts\": 366.911100,\n    \"stddev_ts\": 1.390158,\n    \"samples_ns\": [ 1390612617, 1401100238, 1394627414 ],\n    \"samples_ts\": [ 368.183, 365.427, 367.123 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:12:41Z\",\n    \"avg_ns\": 3990262552,\n    \"stddev_ns\": 10326195,\n    \"avg_ts\": 32.078233,\n    \"stddev_ts\": 0.083136,\n    \"samples_ns\": [ 3996376733, 3978340423, 3996070501 ],\n    \"samples_ts\": [ 32.029, 32.1742, 32.0315 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:12:36Z",
+          "avg_ns": 1395446756,
+          "stddev_ns": 5291732,
+          "avg_ts": 366.9111,
+          "stddev_ts": 1.390158,
+          "samples_ns": [
+            1390612617,
+            1401100238,
+            1394627414
+          ],
+          "samples_ts": [
+            368.183,
+            365.427,
+            367.123
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:12:41Z",
+          "avg_ns": 3990262552,
+          "stddev_ns": 10326195,
+          "avg_ts": 32.078233,
+          "stddev_ts": 0.083136,
+          "samples_ns": [
+            3996376733,
+            3978340423,
+            3996070501
+          ],
+          "samples_ts": [
+            32.029,
+            32.1742,
+            32.0315
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 282
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:13:48.912715+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:12:54Z\",\n    \"avg_ns\": 1473079808,\n    \"stddev_ns\": 2266331,\n    \"avg_ts\": 347.571669,\n    \"stddev_ts\": 0.534702,\n    \"samples_ns\": [ 1470848842, 1473010659, 1475379923 ],\n    \"samples_ts\": [ 348.098, 347.587, 347.029 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:13:00Z\",\n    \"avg_ns\": 16131952614,\n    \"stddev_ns\": 9995396,\n    \"avg_ts\": 31.738262,\n    \"stddev_ts\": 0.019658,\n    \"samples_ns\": [ 16123203948, 16142843949, 16129809947 ],\n    \"samples_ts\": [ 31.7555, 31.7168, 31.7425 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:12:54Z",
+          "avg_ns": 1473079808,
+          "stddev_ns": 2266331,
+          "avg_ts": 347.571669,
+          "stddev_ts": 0.534702,
+          "samples_ns": [
+            1470848842,
+            1473010659,
+            1475379923
+          ],
+          "samples_ts": [
+            348.098,
+            347.587,
+            347.029
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:13:00Z",
+          "avg_ns": 16131952614,
+          "stddev_ns": 9995396,
+          "avg_ts": 31.738262,
+          "stddev_ts": 0.019658,
+          "samples_ns": [
+            16123203948,
+            16142843949,
+            16129809947
+          ],
+          "samples_ts": [
+            31.7555,
+            31.7168,
+            31.7425
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 283
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:14:02.905128+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:13:49Z\",\n    \"avg_ns\": 317954418,\n    \"stddev_ns\": 767709,\n    \"avg_ts\": 402.574990,\n    \"stddev_ts\": 0.971601,\n    \"samples_ns\": [ 318711291, 317177108, 317974857 ],\n    \"samples_ts\": [ 401.617, 403.56, 402.548 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:13:50Z\",\n    \"avg_ns\": 3956876974,\n    \"stddev_ns\": 261019,\n    \"avg_ts\": 32.348744,\n    \"stddev_ts\": 0.002006,\n    \"samples_ns\": [ 3956995320, 3956594837, 3957040767 ],\n    \"samples_ts\": [ 32.3478, 32.3511, 32.3474 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:13:49Z",
+          "avg_ns": 317954418,
+          "stddev_ns": 767709,
+          "avg_ts": 402.57499,
+          "stddev_ts": 0.971601,
+          "samples_ns": [
+            318711291,
+            317177108,
+            317974857
+          ],
+          "samples_ts": [
+            401.617,
+            403.56,
+            402.548
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:13:50Z",
+          "avg_ns": 3956876974,
+          "stddev_ns": 261019,
+          "avg_ts": 32.348744,
+          "stddev_ts": 0.002006,
+          "samples_ns": [
+            3956995320,
+            3956594837,
+            3957040767
+          ],
+          "samples_ts": [
+            32.3478,
+            32.3511,
+            32.3474
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 284
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:14:54.293384+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:14:03Z\",\n    \"avg_ns\": 326578880,\n    \"stddev_ns\": 599861,\n    \"avg_ts\": 391.942944,\n    \"stddev_ts\": 0.719673,\n    \"samples_ns\": [ 327213616, 326021373, 326501651 ],\n    \"samples_ts\": [ 391.182, 392.612, 392.035 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:14:04Z\",\n    \"avg_ns\": 16398435288,\n    \"stddev_ns\": 31029175,\n    \"avg_ts\": 31.222566,\n    \"stddev_ts\": 0.059016,\n    \"samples_ns\": [ 16434156357, 16378167006, 16382982502 ],\n    \"samples_ts\": [ 31.1546, 31.2611, 31.2519 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:14:03Z",
+          "avg_ns": 326578880,
+          "stddev_ns": 599861,
+          "avg_ts": 391.942944,
+          "stddev_ts": 0.719673,
+          "samples_ns": [
+            327213616,
+            326021373,
+            326501651
+          ],
+          "samples_ts": [
+            391.182,
+            392.612,
+            392.035
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:14:04Z",
+          "avg_ns": 16398435288,
+          "stddev_ns": 31029175,
+          "avg_ts": 31.222566,
+          "stddev_ts": 0.059016,
+          "samples_ns": [
+            16434156357,
+            16378167006,
+            16382982502
+          ],
+          "samples_ts": [
+            31.1546,
+            31.2611,
+            31.2519
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 285
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:15:14.644652+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:14:55Z\",\n    \"avg_ns\": 1901672799,\n    \"stddev_ns\": 5163373,\n    \"avg_ts\": 269.237964,\n    \"stddev_ts\": 0.729845,\n    \"samples_ns\": [ 1898433440, 1907626823, 1898958136 ],\n    \"samples_ts\": [ 269.696, 268.396, 269.622 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:15:02Z\",\n    \"avg_ns\": 3965582436,\n    \"stddev_ns\": 8122820,\n    \"avg_ts\": 32.277821,\n    \"stddev_ts\": 0.066192,\n    \"samples_ns\": [ 3970449590, 3956205564, 3970092155 ],\n    \"samples_ts\": [ 32.2382, 32.3542, 32.2411 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:14:55Z",
+          "avg_ns": 1901672799,
+          "stddev_ns": 5163373,
+          "avg_ts": 269.237964,
+          "stddev_ts": 0.729845,
+          "samples_ns": [
+            1898433440,
+            1907626823,
+            1898958136
+          ],
+          "samples_ts": [
+            269.696,
+            268.396,
+            269.622
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:15:02Z",
+          "avg_ns": 3965582436,
+          "stddev_ns": 8122820,
+          "avg_ts": 32.277821,
+          "stddev_ts": 0.066192,
+          "samples_ns": [
+            3970449590,
+            3956205564,
+            3970092155
+          ],
+          "samples_ts": [
+            32.2382,
+            32.3542,
+            32.2411
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 286
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:16:10.814258+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:15:15Z\",\n    \"avg_ns\": 1724102571,\n    \"stddev_ns\": 1423761,\n    \"avg_ts\": 296.966225,\n    \"stddev_ts\": 0.245316,\n    \"samples_ns\": [ 1724523033, 1722515930, 1725268750 ],\n    \"samples_ts\": [ 296.894, 297.24, 296.765 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 270M Q8_0\",\n    \"model_size\": 285018624,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:15:22Z\",\n    \"avg_ns\": 16140432115,\n    \"stddev_ns\": 15402455,\n    \"avg_ts\": 31.721599,\n    \"stddev_ts\": 0.030286,\n    \"samples_ns\": [ 16148295719, 16150314298, 16122686330 ],\n    \"samples_ts\": [ 31.7061, 31.7022, 31.7565 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:15:15Z",
+          "avg_ns": 1724102571,
+          "stddev_ns": 1423761,
+          "avg_ts": 296.966225,
+          "stddev_ts": 0.245316,
+          "samples_ns": [
+            1724523033,
+            1722515930,
+            1725268750
+          ],
+          "samples_ts": [
+            296.894,
+            297.24,
+            296.765
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+          "model_type": "gemma3 270M Q8_0",
+          "model_size": 285018624,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:15:22Z",
+          "avg_ns": 16140432115,
+          "stddev_ns": 15402455,
+          "avg_ts": 31.721599,
+          "stddev_ts": 0.030286,
+          "samples_ns": [
+            16148295719,
+            16150314298,
+            16122686330
+          ],
+          "samples_ts": [
+            31.7061,
+            31.7022,
+            31.7565
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 287
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:16:29.892198+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:16:14Z\",\n    \"avg_ns\": 1120158031,\n    \"stddev_ns\": 12212326,\n    \"avg_ts\": 114.278590,\n    \"stddev_ts\": 1.238100,\n    \"samples_ns\": [ 1134259485, 1113127975, 1113086635 ],\n    \"samples_ts\": [ 112.849, 114.991, 114.996 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:16:18Z\",\n    \"avg_ns\": 3692859261,\n    \"stddev_ns\": 1638707,\n    \"avg_ts\": 34.661493,\n    \"stddev_ts\": 0.015360,\n    \"samples_ns\": [ 3692871824, 3691216564, 3694489397 ],\n    \"samples_ts\": [ 34.6614, 34.6769, 34.6462 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:16:14Z",
+          "avg_ns": 1120158031,
+          "stddev_ns": 12212326,
+          "avg_ts": 114.27859,
+          "stddev_ts": 1.2381,
+          "samples_ns": [
+            1134259485,
+            1113127975,
+            1113086635
+          ],
+          "samples_ts": [
+            112.849,
+            114.991,
+            114.996
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:16:18Z",
+          "avg_ns": 3692859261,
+          "stddev_ns": 1638707,
+          "avg_ts": 34.661493,
+          "stddev_ts": 0.01536,
+          "samples_ns": [
+            3692871824,
+            3691216564,
+            3694489397
+          ],
+          "samples_ts": [
+            34.6614,
+            34.6769,
+            34.6462
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 288
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:17:20.949120+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:16:30Z\",\n    \"avg_ns\": 1117132183,\n    \"stddev_ns\": 9295003,\n    \"avg_ts\": 114.584363,\n    \"stddev_ts\": 0.948821,\n    \"samples_ns\": [ 1111692619, 1127864657, 1111839275 ],\n    \"samples_ts\": [ 115.14, 113.489, 115.125 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:16:35Z\",\n    \"avg_ns\": 15242637061,\n    \"stddev_ns\": 14805936,\n    \"avg_ts\": 33.590009,\n    \"stddev_ts\": 0.032607,\n    \"samples_ns\": [ 15259709424, 15233335306, 15234866455 ],\n    \"samples_ts\": [ 33.5524, 33.6105, 33.6071 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:16:30Z",
+          "avg_ns": 1117132183,
+          "stddev_ns": 9295003,
+          "avg_ts": 114.584363,
+          "stddev_ts": 0.948821,
+          "samples_ns": [
+            1111692619,
+            1127864657,
+            1111839275
+          ],
+          "samples_ts": [
+            115.14,
+            113.489,
+            115.125
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:16:35Z",
+          "avg_ns": 15242637061,
+          "stddev_ns": 14805936,
+          "avg_ts": 33.590009,
+          "stddev_ts": 0.032607,
+          "samples_ns": [
+            15259709424,
+            15233335306,
+            15234866455
+          ],
+          "samples_ts": [
+            33.5524,
+            33.6105,
+            33.6071
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 289
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:17:51.401879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:17:21Z\",\n    \"avg_ns\": 4645609031,\n    \"stddev_ns\": 115373,\n    \"avg_ts\": 110.211599,\n    \"stddev_ts\": 0.002208,\n    \"samples_ns\": [ 4645699183, 4645614650, 4645513261 ],\n    \"samples_ts\": [ 110.209, 110.211, 110.214 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:17:40Z\",\n    \"avg_ns\": 3670013316,\n    \"stddev_ns\": 242384,\n    \"avg_ts\": 34.877258,\n    \"stddev_ts\": 0.002230,\n    \"samples_ns\": [ 3670186338, 3669746174, 3670107437 ],\n    \"samples_ts\": [ 34.8756, 34.8798, 34.8764 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:17:21Z",
+          "avg_ns": 4645609031,
+          "stddev_ns": 115373,
+          "avg_ts": 110.211599,
+          "stddev_ts": 0.002208,
+          "samples_ns": [
+            4645699183,
+            4645614650,
+            4645513261
+          ],
+          "samples_ts": [
+            110.209,
+            110.211,
+            110.214
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:17:40Z",
+          "avg_ns": 3670013316,
+          "stddev_ns": 242384,
+          "avg_ts": 34.877258,
+          "stddev_ts": 0.00223,
+          "samples_ns": [
+            3670186338,
+            3669746174,
+            3670107437
+          ],
+          "samples_ts": [
+            34.8756,
+            34.8798,
+            34.8764
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 290
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:18:56.463452+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:17:52Z\",\n    \"avg_ns\": 4639252027,\n    \"stddev_ns\": 345771,\n    \"avg_ts\": 110.362619,\n    \"stddev_ts\": 0.007900,\n    \"samples_ns\": [ 4639587045, 4638922958, 4639246080 ],\n    \"samples_ts\": [ 110.355, 110.37, 110.363 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:18:10Z\",\n    \"avg_ns\": 15199574899,\n    \"stddev_ns\": 2319138,\n    \"avg_ts\": 33.685153,\n    \"stddev_ts\": 0.005125,\n    \"samples_ns\": [ 15198872519, 15202157233, 15197694947 ],\n    \"samples_ts\": [ 33.6867, 33.6794, 33.6893 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:17:52Z",
+          "avg_ns": 4639252027,
+          "stddev_ns": 345771,
+          "avg_ts": 110.362619,
+          "stddev_ts": 0.0079,
+          "samples_ns": [
+            4639587045,
+            4638922958,
+            4639246080
+          ],
+          "samples_ts": [
+            110.355,
+            110.37,
+            110.363
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:18:10Z",
+          "avg_ns": 15199574899,
+          "stddev_ns": 2319138,
+          "avg_ts": 33.685153,
+          "stddev_ts": 0.005125,
+          "samples_ns": [
+            15198872519,
+            15202157233,
+            15197694947
+          ],
+          "samples_ts": [
+            33.6867,
+            33.6794,
+            33.6893
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 291
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:19:12.899087+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:18:57Z\",\n    \"avg_ns\": 1112217658,\n    \"stddev_ns\": 76592,\n    \"avg_ts\": 115.085388,\n    \"stddev_ts\": 0.007925,\n    \"samples_ns\": [ 1112156636, 1112303609, 1112192729 ],\n    \"samples_ts\": [ 115.092, 115.076, 115.088 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:19:01Z\",\n    \"avg_ns\": 3707903467,\n    \"stddev_ns\": 229619,\n    \"avg_ts\": 34.520856,\n    \"stddev_ts\": 0.002138,\n    \"samples_ns\": [ 3707866257, 3707694725, 3708149419 ],\n    \"samples_ts\": [ 34.5212, 34.5228, 34.5186 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:18:57Z",
+          "avg_ns": 1112217658,
+          "stddev_ns": 76592,
+          "avg_ts": 115.085388,
+          "stddev_ts": 0.007925,
+          "samples_ns": [
+            1112156636,
+            1112303609,
+            1112192729
+          ],
+          "samples_ts": [
+            115.092,
+            115.076,
+            115.088
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:19:01Z",
+          "avg_ns": 3707903467,
+          "stddev_ns": 229619,
+          "avg_ts": 34.520856,
+          "stddev_ts": 0.002138,
+          "samples_ns": [
+            3707866257,
+            3707694725,
+            3708149419
+          ],
+          "samples_ts": [
+            34.5212,
+            34.5228,
+            34.5186
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 292
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:20:03.970023+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:19:13Z\",\n    \"avg_ns\": 1111542378,\n    \"stddev_ns\": 187875,\n    \"avg_ts\": 115.155306,\n    \"stddev_ts\": 0.019463,\n    \"samples_ns\": [ 1111363819, 1111524958, 1111738357 ],\n    \"samples_ts\": [ 115.174, 115.157, 115.135 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:19:18Z\",\n    \"avg_ns\": 15253750223,\n    \"stddev_ns\": 18844710,\n    \"avg_ts\": 33.565550,\n    \"stddev_ts\": 0.041495,\n    \"samples_ns\": [ 15231993404, 15264358079, 15264899188 ],\n    \"samples_ts\": [ 33.6135, 33.5422, 33.541 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:19:13Z",
+          "avg_ns": 1111542378,
+          "stddev_ns": 187875,
+          "avg_ts": 115.155306,
+          "stddev_ts": 0.019463,
+          "samples_ns": [
+            1111363819,
+            1111524958,
+            1111738357
+          ],
+          "samples_ts": [
+            115.174,
+            115.157,
+            115.135
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:19:18Z",
+          "avg_ns": 15253750223,
+          "stddev_ns": 18844710,
+          "avg_ts": 33.56555,
+          "stddev_ts": 0.041495,
+          "samples_ns": [
+            15231993404,
+            15264358079,
+            15264899188
+          ],
+          "samples_ts": [
+            33.6135,
+            33.5422,
+            33.541
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 293
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:20:34.492023+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:20:04Z\",\n    \"avg_ns\": 4672640537,\n    \"stddev_ns\": 371087,\n    \"avg_ts\": 109.574019,\n    \"stddev_ts\": 0.008702,\n    \"samples_ns\": [ 4672482083, 4672374981, 4673064547 ],\n    \"samples_ts\": [ 109.578, 109.58, 109.564 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:20:23Z\",\n    \"avg_ns\": 3656846708,\n    \"stddev_ns\": 733114,\n    \"avg_ts\": 35.002835,\n    \"stddev_ts\": 0.006970,\n    \"samples_ns\": [ 3657360580, 3657166050, 3656013496 ],\n    \"samples_ts\": [ 34.9979, 34.9998, 35.0108 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:20:04Z",
+          "avg_ns": 4672640537,
+          "stddev_ns": 371087,
+          "avg_ts": 109.574019,
+          "stddev_ts": 0.008702,
+          "samples_ns": [
+            4672482083,
+            4672374981,
+            4673064547
+          ],
+          "samples_ts": [
+            109.578,
+            109.58,
+            109.564
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:20:23Z",
+          "avg_ns": 3656846708,
+          "stddev_ns": 733114,
+          "avg_ts": 35.002835,
+          "stddev_ts": 0.00697,
+          "samples_ns": [
+            3657360580,
+            3657166050,
+            3656013496
+          ],
+          "samples_ts": [
+            34.9979,
+            34.9998,
+            35.0108
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 294
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:21:39.167667+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:20:35Z\",\n    \"avg_ns\": 4661433270,\n    \"stddev_ns\": 179766,\n    \"avg_ts\": 109.837462,\n    \"stddev_ts\": 0.003573,\n    \"samples_ns\": [ 4661555291, 4661263511, 4661481010 ],\n    \"samples_ts\": [ 109.835, 109.841, 109.836 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:20:53Z\",\n    \"avg_ns\": 15054601459,\n    \"stddev_ns\": 15637534,\n    \"avg_ts\": 34.009560,\n    \"stddev_ts\": 0.035347,\n    \"samples_ns\": [ 15036546193, 15063780587, 15063477598 ],\n    \"samples_ts\": [ 34.0504, 33.9888, 33.9895 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:20:35Z",
+          "avg_ns": 4661433270,
+          "stddev_ns": 179766,
+          "avg_ts": 109.837462,
+          "stddev_ts": 0.003573,
+          "samples_ns": [
+            4661555291,
+            4661263511,
+            4661481010
+          ],
+          "samples_ts": [
+            109.835,
+            109.841,
+            109.836
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:20:53Z",
+          "avg_ns": 15054601459,
+          "stddev_ns": 15637534,
+          "avg_ts": 34.00956,
+          "stddev_ts": 0.035347,
+          "samples_ns": [
+            15036546193,
+            15063780587,
+            15063477598
+          ],
+          "samples_ts": [
+            34.0504,
+            33.9888,
+            33.9895
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 295
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:21:55.475565+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:21:39Z\",\n    \"avg_ns\": 1112267200,\n    \"stddev_ns\": 192063,\n    \"avg_ts\": 115.080264,\n    \"stddev_ts\": 0.019870,\n    \"samples_ns\": [ 1112151958, 1112160724, 1112488918 ],\n    \"samples_ts\": [ 115.092, 115.091, 115.057 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:21:44Z\",\n    \"avg_ns\": 3665181154,\n    \"stddev_ns\": 1366883,\n    \"avg_ts\": 34.923243,\n    \"stddev_ts\": 0.013027,\n    \"samples_ns\": [ 3663616981, 3666145975, 3665780506 ],\n    \"samples_ts\": [ 34.9382, 34.914, 34.9175 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:21:39Z",
+          "avg_ns": 1112267200,
+          "stddev_ns": 192063,
+          "avg_ts": 115.080264,
+          "stddev_ts": 0.01987,
+          "samples_ns": [
+            1112151958,
+            1112160724,
+            1112488918
+          ],
+          "samples_ts": [
+            115.092,
+            115.091,
+            115.057
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:21:44Z",
+          "avg_ns": 3665181154,
+          "stddev_ns": 1366883,
+          "avg_ts": 34.923243,
+          "stddev_ts": 0.013027,
+          "samples_ns": [
+            3663616981,
+            3666145975,
+            3665780506
+          ],
+          "samples_ts": [
+            34.9382,
+            34.914,
+            34.9175
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 296
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:22:46.491338+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:21:56Z\",\n    \"avg_ns\": 1111870249,\n    \"stddev_ns\": 118454,\n    \"avg_ts\": 115.121347,\n    \"stddev_ts\": 0.011769,\n    \"samples_ns\": [ 1111747810, 1111972410, 1111890528 ],\n    \"samples_ts\": [ 115.134, 115.111, 115.119 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:22:00Z\",\n    \"avg_ns\": 15234870471,\n    \"stddev_ns\": 30233142,\n    \"avg_ts\": 33.607200,\n    \"stddev_ts\": 0.066615,\n    \"samples_ns\": [ 15269779130, 15217188997, 15217643288 ],\n    \"samples_ts\": [ 33.5303, 33.6462, 33.6452 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:21:56Z",
+          "avg_ns": 1111870249,
+          "stddev_ns": 118454,
+          "avg_ts": 115.121347,
+          "stddev_ts": 0.011769,
+          "samples_ns": [
+            1111747810,
+            1111972410,
+            1111890528
+          ],
+          "samples_ts": [
+            115.134,
+            115.111,
+            115.119
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:22:00Z",
+          "avg_ns": 15234870471,
+          "stddev_ns": 30233142,
+          "avg_ts": 33.6072,
+          "stddev_ts": 0.066615,
+          "samples_ns": [
+            15269779130,
+            15217188997,
+            15217643288
+          ],
+          "samples_ts": [
+            33.5303,
+            33.6462,
+            33.6452
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 297
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:23:18.218018+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:22:47Z\",\n    \"avg_ns\": 4942523695,\n    \"stddev_ns\": 1306194,\n    \"avg_ts\": 103.590808,\n    \"stddev_ts\": 0.027334,\n    \"samples_ns\": [ 4941499657, 4942079310, 4943992119 ],\n    \"samples_ts\": [ 103.612, 103.6, 103.56 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:23:07Z\",\n    \"avg_ns\": 3697233377,\n    \"stddev_ns\": 416004,\n    \"avg_ts\": 34.620482,\n    \"stddev_ts\": 0.003853,\n    \"samples_ns\": [ 3697707813, 3697019518, 3696972801 ],\n    \"samples_ts\": [ 34.616, 34.6225, 34.6229 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:22:47Z",
+          "avg_ns": 4942523695,
+          "stddev_ns": 1306194,
+          "avg_ts": 103.590808,
+          "stddev_ts": 0.027334,
+          "samples_ns": [
+            4941499657,
+            4942079310,
+            4943992119
+          ],
+          "samples_ts": [
+            103.612,
+            103.6,
+            103.56
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:23:07Z",
+          "avg_ns": 3697233377,
+          "stddev_ns": 416004,
+          "avg_ts": 34.620482,
+          "stddev_ts": 0.003853,
+          "samples_ns": [
+            3697707813,
+            3697019518,
+            3696972801
+          ],
+          "samples_ts": [
+            34.616,
+            34.6225,
+            34.6229
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 298
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:24:24.629492+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:23:18Z\",\n    \"avg_ns\": 4925830120,\n    \"stddev_ns\": 1416635,\n    \"avg_ts\": 103.941877,\n    \"stddev_ts\": 0.029854,\n    \"samples_ns\": [ 4925587570, 4927350612, 4924552179 ],\n    \"samples_ts\": [ 103.947, 103.91, 103.969 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:23:38Z\",\n    \"avg_ns\": 15280506182,\n    \"stddev_ns\": 4913095,\n    \"avg_ts\": 33.506746,\n    \"stddev_ts\": 0.010767,\n    \"samples_ns\": [ 15285381540, 15275562289, 15280574719 ],\n    \"samples_ts\": [ 33.4961, 33.5176, 33.5066 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:23:18Z",
+          "avg_ns": 4925830120,
+          "stddev_ns": 1416635,
+          "avg_ts": 103.941877,
+          "stddev_ts": 0.029854,
+          "samples_ns": [
+            4925587570,
+            4927350612,
+            4924552179
+          ],
+          "samples_ts": [
+            103.947,
+            103.91,
+            103.969
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:23:38Z",
+          "avg_ns": 15280506182,
+          "stddev_ns": 4913095,
+          "avg_ts": 33.506746,
+          "stddev_ts": 0.010767,
+          "samples_ns": [
+            15285381540,
+            15275562289,
+            15280574719
+          ],
+          "samples_ts": [
+            33.4961,
+            33.5176,
+            33.5066
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 299
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:24:41.052070+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:24:25Z\",\n    \"avg_ns\": 1111813886,\n    \"stddev_ns\": 44018,\n    \"avg_ts\": 115.127182,\n    \"stddev_ts\": 0.002976,\n    \"samples_ns\": [ 1111783091, 1111839985, 1111818583 ],\n    \"samples_ts\": [ 115.13, 115.124, 115.127 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:24:29Z\",\n    \"avg_ns\": 3704509332,\n    \"stddev_ns\": 784317,\n    \"avg_ts\": 34.552485,\n    \"stddev_ts\": 0.007293,\n    \"samples_ns\": [ 3704358094, 3705355857, 3703814046 ],\n    \"samples_ts\": [ 34.5539, 34.5446, 34.559 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:24:25Z",
+          "avg_ns": 1111813886,
+          "stddev_ns": 44018,
+          "avg_ts": 115.127182,
+          "stddev_ts": 0.002976,
+          "samples_ns": [
+            1111783091,
+            1111839985,
+            1111818583
+          ],
+          "samples_ts": [
+            115.13,
+            115.124,
+            115.127
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:24:29Z",
+          "avg_ns": 3704509332,
+          "stddev_ns": 784317,
+          "avg_ts": 34.552485,
+          "stddev_ts": 0.007293,
+          "samples_ns": [
+            3704358094,
+            3705355857,
+            3703814046
+          ],
+          "samples_ts": [
+            34.5539,
+            34.5446,
+            34.559
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 300
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:25:31.741072+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:24:41Z\",\n    \"avg_ns\": 1112143409,\n    \"stddev_ns\": 39970,\n    \"avg_ts\": 115.093071,\n    \"stddev_ts\": 0.004136,\n    \"samples_ns\": [ 1112135645, 1112107890, 1112186692 ],\n    \"samples_ts\": [ 115.094, 115.097, 115.089 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:24:46Z\",\n    \"avg_ns\": 15124610929,\n    \"stddev_ns\": 1018337,\n    \"avg_ts\": 33.852111,\n    \"stddev_ts\": 0.002246,\n    \"samples_ns\": [ 15124025589, 15124037693, 15125769507 ],\n    \"samples_ts\": [ 33.8534, 33.8534, 33.8495 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:24:41Z",
+          "avg_ns": 1112143409,
+          "stddev_ns": 39970,
+          "avg_ts": 115.093071,
+          "stddev_ts": 0.004136,
+          "samples_ns": [
+            1112135645,
+            1112107890,
+            1112186692
+          ],
+          "samples_ts": [
+            115.094,
+            115.097,
+            115.089
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:24:46Z",
+          "avg_ns": 15124610929,
+          "stddev_ns": 1018337,
+          "avg_ts": 33.852111,
+          "stddev_ts": 0.002246,
+          "samples_ns": [
+            15124025589,
+            15124037693,
+            15125769507
+          ],
+          "samples_ts": [
+            33.8534,
+            33.8534,
+            33.8495
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 301
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:26:02.144620+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:25:32Z\",\n    \"avg_ns\": 4634149526,\n    \"stddev_ns\": 94654,\n    \"avg_ts\": 110.484135,\n    \"stddev_ts\": 0.001568,\n    \"samples_ns\": [ 4634089244, 4634139669, 4634219666 ],\n    \"samples_ts\": [ 110.486, 110.484, 110.482 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:25:51Z\",\n    \"avg_ns\": 3656811954,\n    \"stddev_ns\": 96609,\n    \"avg_ts\": 35.003167,\n    \"stddev_ts\": 0.000925,\n    \"samples_ns\": [ 3656701225, 3656855584, 3656879053 ],\n    \"samples_ts\": [ 35.0042, 35.0027, 35.0025 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:25:32Z",
+          "avg_ns": 4634149526,
+          "stddev_ns": 94654,
+          "avg_ts": 110.484135,
+          "stddev_ts": 0.001568,
+          "samples_ns": [
+            4634089244,
+            4634139669,
+            4634219666
+          ],
+          "samples_ts": [
+            110.486,
+            110.484,
+            110.482
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:25:51Z",
+          "avg_ns": 3656811954,
+          "stddev_ns": 96609,
+          "avg_ts": 35.003167,
+          "stddev_ts": 0.000925,
+          "samples_ns": [
+            3656701225,
+            3656855584,
+            3656879053
+          ],
+          "samples_ts": [
+            35.0042,
+            35.0027,
+            35.0025
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 302
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:27:07.251859+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:26:02Z\",\n    \"avg_ns\": 4637255941,\n    \"stddev_ns\": 74999,\n    \"avg_ts\": 110.410123,\n    \"stddev_ts\": 0.001786,\n    \"samples_ns\": [ 4637224892, 4637341479, 4637201452 ],\n    \"samples_ts\": [ 110.411, 110.408, 110.411 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:26:21Z\",\n    \"avg_ns\": 15230074317,\n    \"stddev_ns\": 13732462,\n    \"avg_ts\": 33.617714,\n    \"stddev_ts\": 0.030296,\n    \"samples_ns\": [ 15245909960, 15222867006, 15221445985 ],\n    \"samples_ts\": [ 33.5828, 33.6336, 33.6368 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:26:02Z",
+          "avg_ns": 4637255941,
+          "stddev_ns": 74999,
+          "avg_ts": 110.410123,
+          "stddev_ts": 0.001786,
+          "samples_ns": [
+            4637224892,
+            4637341479,
+            4637201452
+          ],
+          "samples_ts": [
+            110.411,
+            110.408,
+            110.411
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:26:21Z",
+          "avg_ns": 15230074317,
+          "stddev_ns": 13732462,
+          "avg_ts": 33.617714,
+          "stddev_ts": 0.030296,
+          "samples_ns": [
+            15245909960,
+            15222867006,
+            15221445985
+          ],
+          "samples_ts": [
+            33.5828,
+            33.6336,
+            33.6368
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 303
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:27:23.709940+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:27:08Z\",\n    \"avg_ns\": 1112574223,\n    \"stddev_ns\": 78652,\n    \"avg_ts\": 115.048504,\n    \"stddev_ts\": 0.007365,\n    \"samples_ns\": [ 1112526602, 1112539958, 1112656110 ],\n    \"samples_ts\": [ 115.053, 115.052, 115.04 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:27:12Z\",\n    \"avg_ns\": 3706006755,\n    \"stddev_ns\": 3079143,\n    \"avg_ts\": 34.538539,\n    \"stddev_ts\": 0.028693,\n    \"samples_ns\": [ 3709226272, 3705703580, 3703090413 ],\n    \"samples_ts\": [ 34.5085, 34.5413, 34.5657 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:27:08Z",
+          "avg_ns": 1112574223,
+          "stddev_ns": 78652,
+          "avg_ts": 115.048504,
+          "stddev_ts": 0.007365,
+          "samples_ns": [
+            1112526602,
+            1112539958,
+            1112656110
+          ],
+          "samples_ts": [
+            115.053,
+            115.052,
+            115.04
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:27:12Z",
+          "avg_ns": 3706006755,
+          "stddev_ns": 3079143,
+          "avg_ts": 34.538539,
+          "stddev_ts": 0.028693,
+          "samples_ns": [
+            3709226272,
+            3705703580,
+            3703090413
+          ],
+          "samples_ts": [
+            34.5085,
+            34.5413,
+            34.5657
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 304
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:28:14.403696+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:27:24Z\",\n    \"avg_ns\": 1111969189,\n    \"stddev_ns\": 119251,\n    \"avg_ts\": 115.111104,\n    \"stddev_ts\": 0.012345,\n    \"samples_ns\": [ 1112002056, 1112068560, 1111836951 ],\n    \"samples_ts\": [ 115.108, 115.101, 115.125 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:27:28Z\",\n    \"avg_ns\": 15112326543,\n    \"stddev_ns\": 6862407,\n    \"avg_ts\": 33.879633,\n    \"stddev_ts\": 0.015389,\n    \"samples_ns\": [ 15104404167, 15116147726, 15116427736 ],\n    \"samples_ts\": [ 33.8974, 33.8711, 33.8704 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:27:24Z",
+          "avg_ns": 1111969189,
+          "stddev_ns": 119251,
+          "avg_ts": 115.111104,
+          "stddev_ts": 0.012345,
+          "samples_ns": [
+            1112002056,
+            1112068560,
+            1111836951
+          ],
+          "samples_ts": [
+            115.108,
+            115.101,
+            115.125
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:27:28Z",
+          "avg_ns": 15112326543,
+          "stddev_ns": 6862407,
+          "avg_ts": 33.879633,
+          "stddev_ts": 0.015389,
+          "samples_ns": [
+            15104404167,
+            15116147726,
+            15116427736
+          ],
+          "samples_ts": [
+            33.8974,
+            33.8711,
+            33.8704
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 305
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:28:45.045158+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:28:15Z\",\n    \"avg_ns\": 4689141948,\n    \"stddev_ns\": 325040,\n    \"avg_ts\": 109.188420,\n    \"stddev_ts\": 0.007399,\n    \"samples_ns\": [ 4689097531, 4688848749, 4689479565 ],\n    \"samples_ts\": [ 109.189, 109.195, 109.181 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:28:33Z\",\n    \"avg_ns\": 3672672987,\n    \"stddev_ns\": 741183,\n    \"avg_ts\": 34.852001,\n    \"stddev_ts\": 0.006986,\n    \"samples_ns\": [ 3673385647, 3672718025, 3671915291 ],\n    \"samples_ts\": [ 34.8452, 34.8516, 34.8592 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:28:15Z",
+          "avg_ns": 4689141948,
+          "stddev_ns": 325040,
+          "avg_ts": 109.18842,
+          "stddev_ts": 0.007399,
+          "samples_ns": [
+            4689097531,
+            4688848749,
+            4689479565
+          ],
+          "samples_ts": [
+            109.189,
+            109.195,
+            109.181
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:28:33Z",
+          "avg_ns": 3672672987,
+          "stddev_ns": 741183,
+          "avg_ts": 34.852001,
+          "stddev_ts": 0.006986,
+          "samples_ns": [
+            3673385647,
+            3672718025,
+            3671915291
+          ],
+          "samples_ts": [
+            34.8452,
+            34.8516,
+            34.8592
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 306
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:29:49.930220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:28:45Z\",\n    \"avg_ns\": 4658300511,\n    \"stddev_ns\": 592065,\n    \"avg_ts\": 109.911330,\n    \"stddev_ts\": 0.013877,\n    \"samples_ns\": [ 4657623537, 4658592501, 4658685496 ],\n    \"samples_ts\": [ 109.927, 109.904, 109.902 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:29:04Z\",\n    \"avg_ns\": 15127618181,\n    \"stddev_ns\": 9158718,\n    \"avg_ts\": 33.845389,\n    \"stddev_ts\": 0.020484,\n    \"samples_ns\": [ 15138115769, 15121259155, 15123479619 ],\n    \"samples_ts\": [ 33.8219, 33.8596, 33.8546 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:28:45Z",
+          "avg_ns": 4658300511,
+          "stddev_ns": 592065,
+          "avg_ts": 109.91133,
+          "stddev_ts": 0.013877,
+          "samples_ns": [
+            4657623537,
+            4658592501,
+            4658685496
+          ],
+          "samples_ts": [
+            109.927,
+            109.904,
+            109.902
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:29:04Z",
+          "avg_ns": 15127618181,
+          "stddev_ns": 9158718,
+          "avg_ts": 33.845389,
+          "stddev_ts": 0.020484,
+          "samples_ns": [
+            15138115769,
+            15121259155,
+            15123479619
+          ],
+          "samples_ts": [
+            33.8219,
+            33.8596,
+            33.8546
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 307
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:30:06.384562+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:29:50Z\",\n    \"avg_ns\": 1112100370,\n    \"stddev_ns\": 172271,\n    \"avg_ts\": 115.097527,\n    \"stddev_ts\": 0.017493,\n    \"samples_ns\": [ 1112163554, 1111908868, 1112228689 ],\n    \"samples_ts\": [ 115.091, 115.117, 115.084 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:29:55Z\",\n    \"avg_ns\": 3715132451,\n    \"stddev_ns\": 512835,\n    \"avg_ts\": 34.453685,\n    \"stddev_ts\": 0.004722,\n    \"samples_ns\": [ 3714590567, 3715205767, 3715601020 ],\n    \"samples_ts\": [ 34.4587, 34.453, 34.4493 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:29:50Z",
+          "avg_ns": 1112100370,
+          "stddev_ns": 172271,
+          "avg_ts": 115.097527,
+          "stddev_ts": 0.017493,
+          "samples_ns": [
+            1112163554,
+            1111908868,
+            1112228689
+          ],
+          "samples_ts": [
+            115.091,
+            115.117,
+            115.084
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:29:55Z",
+          "avg_ns": 3715132451,
+          "stddev_ns": 512835,
+          "avg_ts": 34.453685,
+          "stddev_ts": 0.004722,
+          "samples_ns": [
+            3714590567,
+            3715205767,
+            3715601020
+          ],
+          "samples_ts": [
+            34.4587,
+            34.453,
+            34.4493
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 308
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:30:57.240394+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:30:07Z\",\n    \"avg_ns\": 1112370618,\n    \"stddev_ns\": 71388,\n    \"avg_ts\": 115.069562,\n    \"stddev_ts\": 0.005543,\n    \"samples_ns\": [ 1112311758, 1112416576, 1112383522 ],\n    \"samples_ts\": [ 115.076, 115.065, 115.068 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:30:11Z\",\n    \"avg_ns\": 15180096222,\n    \"stddev_ns\": 22661582,\n    \"avg_ts\": 33.728427,\n    \"stddev_ts\": 0.050394,\n    \"samples_ns\": [ 15153949926, 15192269075, 15194069666 ],\n    \"samples_ts\": [ 33.7866, 33.7014, 33.6974 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:30:07Z",
+          "avg_ns": 1112370618,
+          "stddev_ns": 71388,
+          "avg_ts": 115.069562,
+          "stddev_ts": 0.005543,
+          "samples_ns": [
+            1112311758,
+            1112416576,
+            1112383522
+          ],
+          "samples_ts": [
+            115.076,
+            115.065,
+            115.068
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:30:11Z",
+          "avg_ns": 15180096222,
+          "stddev_ns": 22661582,
+          "avg_ts": 33.728427,
+          "stddev_ts": 0.050394,
+          "samples_ns": [
+            15153949926,
+            15192269075,
+            15194069666
+          ],
+          "samples_ts": [
+            33.7866,
+            33.7014,
+            33.6974
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 309
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:31:28.829531+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:30:58Z\",\n    \"avg_ns\": 4929165791,\n    \"stddev_ns\": 320718,\n    \"avg_ts\": 103.871532,\n    \"stddev_ts\": 0.006594,\n    \"samples_ns\": [ 4929512794, 4928904981, 4929079599 ],\n    \"samples_ts\": [ 103.864, 103.877, 103.873 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:31:17Z\",\n    \"avg_ns\": 3660591877,\n    \"stddev_ns\": 987515,\n    \"avg_ts\": 34.967025,\n    \"stddev_ts\": 0.009414,\n    \"samples_ns\": [ 3661672758, 3660360141, 3659742733 ],\n    \"samples_ts\": [ 34.9567, 34.9692, 34.9751 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:30:58Z",
+          "avg_ns": 4929165791,
+          "stddev_ns": 320718,
+          "avg_ts": 103.871532,
+          "stddev_ts": 0.006594,
+          "samples_ns": [
+            4929512794,
+            4928904981,
+            4929079599
+          ],
+          "samples_ts": [
+            103.864,
+            103.877,
+            103.873
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:31:17Z",
+          "avg_ns": 3660591877,
+          "stddev_ns": 987515,
+          "avg_ts": 34.967025,
+          "stddev_ts": 0.009414,
+          "samples_ns": [
+            3661672758,
+            3660360141,
+            3659742733
+          ],
+          "samples_ts": [
+            34.9567,
+            34.9692,
+            34.9751
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 310
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:32:35.101215+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:31:29Z\",\n    \"avg_ns\": 4946311521,\n    \"stddev_ns\": 623956,\n    \"avg_ts\": 103.511476,\n    \"stddev_ts\": 0.013057,\n    \"samples_ns\": [ 4946177566, 4946991576, 4945765421 ],\n    \"samples_ts\": [ 103.514, 103.497, 103.523 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:31:49Z\",\n    \"avg_ns\": 15203698717,\n    \"stddev_ns\": 51627890,\n    \"avg_ts\": 33.676274,\n    \"stddev_ts\": 0.114132,\n    \"samples_ns\": [ 15263306328, 15173105321, 15174684503 ],\n    \"samples_ts\": [ 33.5445, 33.7439, 33.7404 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:31:29Z",
+          "avg_ns": 4946311521,
+          "stddev_ns": 623956,
+          "avg_ts": 103.511476,
+          "stddev_ts": 0.013057,
+          "samples_ns": [
+            4946177566,
+            4946991576,
+            4945765421
+          ],
+          "samples_ts": [
+            103.514,
+            103.497,
+            103.523
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:31:49Z",
+          "avg_ns": 15203698717,
+          "stddev_ns": 51627890,
+          "avg_ts": 33.676274,
+          "stddev_ts": 0.114132,
+          "samples_ns": [
+            15263306328,
+            15173105321,
+            15174684503
+          ],
+          "samples_ts": [
+            33.5445,
+            33.7439,
+            33.7404
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 311
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:32:51.368566+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:32:35Z\",\n    \"avg_ns\": 1112107771,\n    \"stddev_ns\": 37408,\n    \"avg_ts\": 115.096759,\n    \"stddev_ts\": 0.003872,\n    \"samples_ns\": [ 1112065911, 1112119469, 1112137933 ],\n    \"samples_ts\": [ 115.101, 115.096, 115.094 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:32:40Z\",\n    \"avg_ns\": 3641813770,\n    \"stddev_ns\": 393708,\n    \"avg_ts\": 35.147322,\n    \"stddev_ts\": 0.003755,\n    \"samples_ns\": [ 3642246100, 3641703356, 3641491855 ],\n    \"samples_ts\": [ 35.1431, 35.1484, 35.1504 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:32:35Z",
+          "avg_ns": 1112107771,
+          "stddev_ns": 37408,
+          "avg_ts": 115.096759,
+          "stddev_ts": 0.003872,
+          "samples_ns": [
+            1112065911,
+            1112119469,
+            1112137933
+          ],
+          "samples_ts": [
+            115.101,
+            115.096,
+            115.094
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:32:40Z",
+          "avg_ns": 3641813770,
+          "stddev_ns": 393708,
+          "avg_ts": 35.147322,
+          "stddev_ts": 0.003755,
+          "samples_ns": [
+            3642246100,
+            3641703356,
+            3641491855
+          ],
+          "samples_ts": [
+            35.1431,
+            35.1484,
+            35.1504
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 312
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:33:42.362476+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:32:52Z\",\n    \"avg_ns\": 1112386484,\n    \"stddev_ns\": 64178,\n    \"avg_ts\": 115.067921,\n    \"stddev_ts\": 0.005672,\n    \"samples_ns\": [ 1112445107, 1112336459, 1112377887 ],\n    \"samples_ts\": [ 115.062, 115.073, 115.069 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:32:56Z\",\n    \"avg_ns\": 15224750527,\n    \"stddev_ns\": 47592730,\n    \"avg_ts\": 33.629670,\n    \"stddev_ts\": 0.104938,\n    \"samples_ns\": [ 15279668557, 15195542139, 15199040886 ],\n    \"samples_ts\": [ 33.5086, 33.6941, 33.6863 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:32:52Z",
+          "avg_ns": 1112386484,
+          "stddev_ns": 64178,
+          "avg_ts": 115.067921,
+          "stddev_ts": 0.005672,
+          "samples_ns": [
+            1112445107,
+            1112336459,
+            1112377887
+          ],
+          "samples_ts": [
+            115.062,
+            115.073,
+            115.069
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:32:56Z",
+          "avg_ns": 15224750527,
+          "stddev_ns": 47592730,
+          "avg_ts": 33.62967,
+          "stddev_ts": 0.104938,
+          "samples_ns": [
+            15279668557,
+            15195542139,
+            15199040886
+          ],
+          "samples_ts": [
+            33.5086,
+            33.6941,
+            33.6863
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 313
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:34:12.854259+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:33:43Z\",\n    \"avg_ns\": 4636943869,\n    \"stddev_ns\": 89453,\n    \"avg_ts\": 110.417554,\n    \"stddev_ts\": 0.002130,\n    \"samples_ns\": [ 4636958724, 4636847918, 4637024965 ],\n    \"samples_ts\": [ 110.417, 110.42, 110.416 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:34:01Z\",\n    \"avg_ns\": 3681843010,\n    \"stddev_ns\": 250044,\n    \"avg_ts\": 34.765198,\n    \"stddev_ts\": 0.002290,\n    \"samples_ns\": [ 3681896735, 3682054215, 3681578081 ],\n    \"samples_ts\": [ 34.7647, 34.7632, 34.7677 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:33:43Z",
+          "avg_ns": 4636943869,
+          "stddev_ns": 89453,
+          "avg_ts": 110.417554,
+          "stddev_ts": 0.00213,
+          "samples_ns": [
+            4636958724,
+            4636847918,
+            4637024965
+          ],
+          "samples_ts": [
+            110.417,
+            110.42,
+            110.416
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:34:01Z",
+          "avg_ns": 3681843010,
+          "stddev_ns": 250044,
+          "avg_ts": 34.765198,
+          "stddev_ts": 0.00229,
+          "samples_ns": [
+            3681896735,
+            3682054215,
+            3681578081
+          ],
+          "samples_ts": [
+            34.7647,
+            34.7632,
+            34.7677
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 314
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:35:17.997683+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:34:13Z\",\n    \"avg_ns\": 4651791912,\n    \"stddev_ns\": 145416,\n    \"avg_ts\": 110.065112,\n    \"stddev_ts\": 0.002575,\n    \"samples_ns\": [ 4651680947, 4651898457, 4651796334 ],\n    \"samples_ts\": [ 110.068, 110.063, 110.065 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:34:32Z\",\n    \"avg_ns\": 15223593996,\n    \"stddev_ns\": 39495899,\n    \"avg_ts\": 33.632157,\n    \"stddev_ts\": 0.087384,\n    \"samples_ns\": [ 15178034981, 15244586628, 15248160380 ],\n    \"samples_ts\": [ 33.733, 33.5857, 33.5778 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:34:13Z",
+          "avg_ns": 4651791912,
+          "stddev_ns": 145416,
+          "avg_ts": 110.065112,
+          "stddev_ts": 0.002575,
+          "samples_ns": [
+            4651680947,
+            4651898457,
+            4651796334
+          ],
+          "samples_ts": [
+            110.068,
+            110.063,
+            110.065
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:34:32Z",
+          "avg_ns": 15223593996,
+          "stddev_ns": 39495899,
+          "avg_ts": 33.632157,
+          "stddev_ts": 0.087384,
+          "samples_ns": [
+            15178034981,
+            15244586628,
+            15248160380
+          ],
+          "samples_ts": [
+            33.733,
+            33.5857,
+            33.5778
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 315
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:35:34.311766+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:35:18Z\",\n    \"avg_ns\": 1112445077,\n    \"stddev_ns\": 102762,\n    \"avg_ts\": 115.061861,\n    \"stddev_ts\": 0.010629,\n    \"samples_ns\": [ 1112326587, 1112498831, 1112509813 ],\n    \"samples_ts\": [ 115.074, 115.056, 115.055 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:35:23Z\",\n    \"avg_ns\": 3650341210,\n    \"stddev_ns\": 702512,\n    \"avg_ts\": 35.065216,\n    \"stddev_ts\": 0.006699,\n    \"samples_ns\": [ 3650916510, 3650541410, 3649565712 ],\n    \"samples_ts\": [ 35.0597, 35.0633, 35.0727 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:35:18Z",
+          "avg_ns": 1112445077,
+          "stddev_ns": 102762,
+          "avg_ts": 115.061861,
+          "stddev_ts": 0.010629,
+          "samples_ns": [
+            1112326587,
+            1112498831,
+            1112509813
+          ],
+          "samples_ts": [
+            115.074,
+            115.056,
+            115.055
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:35:23Z",
+          "avg_ns": 3650341210,
+          "stddev_ns": 702512,
+          "avg_ts": 35.065216,
+          "stddev_ts": 0.006699,
+          "samples_ns": [
+            3650916510,
+            3650541410,
+            3649565712
+          ],
+          "samples_ts": [
+            35.0597,
+            35.0633,
+            35.0727
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 316
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:36:24.977345+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:35:35Z\",\n    \"avg_ns\": 1111617126,\n    \"stddev_ns\": 52373,\n    \"avg_ts\": 115.147560,\n    \"stddev_ts\": 0.005425,\n    \"samples_ns\": [ 1111677600, 1111587231, 1111586547 ],\n    \"samples_ts\": [ 115.141, 115.151, 115.151 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:35:39Z\",\n    \"avg_ns\": 15117515271,\n    \"stddev_ns\": 810239,\n    \"avg_ts\": 33.868000,\n    \"stddev_ts\": 0.001794,\n    \"samples_ns\": [ 15118217321, 15117685510, 15116642983 ],\n    \"samples_ts\": [ 33.8664, 33.8676, 33.87 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:35:35Z",
+          "avg_ns": 1111617126,
+          "stddev_ns": 52373,
+          "avg_ts": 115.14756,
+          "stddev_ts": 0.005425,
+          "samples_ns": [
+            1111677600,
+            1111587231,
+            1111586547
+          ],
+          "samples_ts": [
+            115.141,
+            115.151,
+            115.151
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:35:39Z",
+          "avg_ns": 15117515271,
+          "stddev_ns": 810239,
+          "avg_ts": 33.868,
+          "stddev_ts": 0.001794,
+          "samples_ns": [
+            15118217321,
+            15117685510,
+            15116642983
+          ],
+          "samples_ts": [
+            33.8664,
+            33.8676,
+            33.87
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 317
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:36:55.659610+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:36:25Z\",\n    \"avg_ns\": 4684162241,\n    \"stddev_ns\": 297514,\n    \"avg_ts\": 109.304498,\n    \"stddev_ts\": 0.006942,\n    \"samples_ns\": [ 4684070634, 4684494787, 4683921302 ],\n    \"samples_ts\": [ 109.307, 109.297, 109.31 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:36:44Z\",\n    \"avg_ns\": 3684840296,\n    \"stddev_ns\": 479127,\n    \"avg_ts\": 34.736920,\n    \"stddev_ts\": 0.004481,\n    \"samples_ns\": [ 3685177534, 3685046619, 3684296736 ],\n    \"samples_ts\": [ 34.7337, 34.735, 34.742 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:36:25Z",
+          "avg_ns": 4684162241,
+          "stddev_ns": 297514,
+          "avg_ts": 109.304498,
+          "stddev_ts": 0.006942,
+          "samples_ns": [
+            4684070634,
+            4684494787,
+            4683921302
+          ],
+          "samples_ts": [
+            109.307,
+            109.297,
+            109.31
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:36:44Z",
+          "avg_ns": 3684840296,
+          "stddev_ns": 479127,
+          "avg_ts": 34.73692,
+          "stddev_ts": 0.004481,
+          "samples_ns": [
+            3685177534,
+            3685046619,
+            3684296736
+          ],
+          "samples_ts": [
+            34.7337,
+            34.735,
+            34.742
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 318
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:38:00.720673+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:36:56Z\",\n    \"avg_ns\": 4650866306,\n    \"stddev_ns\": 497631,\n    \"avg_ts\": 110.087018,\n    \"stddev_ts\": 0.011779,\n    \"samples_ns\": [ 4651395656, 4650795225, 4650408037 ],\n    \"samples_ts\": [ 110.074, 110.089, 110.098 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:37:15Z\",\n    \"avg_ns\": 15191805760,\n    \"stddev_ns\": 17227477,\n    \"avg_ts\": 33.702408,\n    \"stddev_ts\": 0.038244,\n    \"samples_ns\": [ 15171913285, 15201807395, 15201696600 ],\n    \"samples_ts\": [ 33.7466, 33.6802, 33.6805 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:36:56Z",
+          "avg_ns": 4650866306,
+          "stddev_ns": 497631,
+          "avg_ts": 110.087018,
+          "stddev_ts": 0.011779,
+          "samples_ns": [
+            4651395656,
+            4650795225,
+            4650408037
+          ],
+          "samples_ts": [
+            110.074,
+            110.089,
+            110.098
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:37:15Z",
+          "avg_ns": 15191805760,
+          "stddev_ns": 17227477,
+          "avg_ts": 33.702408,
+          "stddev_ts": 0.038244,
+          "samples_ns": [
+            15171913285,
+            15201807395,
+            15201696600
+          ],
+          "samples_ts": [
+            33.7466,
+            33.6802,
+            33.6805
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 319
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:38:17.024618+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:38:01Z\",\n    \"avg_ns\": 1111818892,\n    \"stddev_ns\": 65727,\n    \"avg_ts\": 115.126664,\n    \"stddev_ts\": 0.005865,\n    \"samples_ns\": [ 1111834565, 1111866047, 1111756065 ],\n    \"samples_ts\": [ 115.125, 115.122, 115.133 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:38:05Z\",\n    \"avg_ns\": 3639488176,\n    \"stddev_ns\": 1548240,\n    \"avg_ts\": 35.169785,\n    \"stddev_ts\": 0.014937,\n    \"samples_ns\": [ 3641142008, 3639242983, 3638079539 ],\n    \"samples_ts\": [ 35.1538, 35.1721, 35.1834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:38:01Z",
+          "avg_ns": 1111818892,
+          "stddev_ns": 65727,
+          "avg_ts": 115.126664,
+          "stddev_ts": 0.005865,
+          "samples_ns": [
+            1111834565,
+            1111866047,
+            1111756065
+          ],
+          "samples_ts": [
+            115.125,
+            115.122,
+            115.133
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:38:05Z",
+          "avg_ns": 3639488176,
+          "stddev_ns": 1548240,
+          "avg_ts": 35.169785,
+          "stddev_ts": 0.014937,
+          "samples_ns": [
+            3641142008,
+            3639242983,
+            3638079539
+          ],
+          "samples_ts": [
+            35.1538,
+            35.1721,
+            35.1834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 320
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:39:08.070359+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:38:17Z\",\n    \"avg_ns\": 1111722072,\n    \"stddev_ns\": 84390,\n    \"avg_ts\": 115.136691,\n    \"stddev_ts\": 0.007248,\n    \"samples_ns\": [ 1111650726, 1111724875, 1111790617 ],\n    \"samples_ts\": [ 115.144, 115.136, 115.13 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:38:22Z\",\n    \"avg_ns\": 15244130695,\n    \"stddev_ns\": 19558784,\n    \"avg_ts\": 33.586734,\n    \"stddev_ts\": 0.043059,\n    \"samples_ns\": [ 15266712471, 15232588520, 15233091096 ],\n    \"samples_ts\": [ 33.537, 33.6121, 33.611 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:38:17Z",
+          "avg_ns": 1111722072,
+          "stddev_ns": 84390,
+          "avg_ts": 115.136691,
+          "stddev_ts": 0.007248,
+          "samples_ns": [
+            1111650726,
+            1111724875,
+            1111790617
+          ],
+          "samples_ts": [
+            115.144,
+            115.136,
+            115.13
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:38:22Z",
+          "avg_ns": 15244130695,
+          "stddev_ns": 19558784,
+          "avg_ts": 33.586734,
+          "stddev_ts": 0.043059,
+          "samples_ns": [
+            15266712471,
+            15232588520,
+            15233091096
+          ],
+          "samples_ts": [
+            33.537,
+            33.6121,
+            33.611
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 321
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:39:39.761351+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:39:08Z\",\n    \"avg_ns\": 4931410306,\n    \"stddev_ns\": 440413,\n    \"avg_ts\": 103.824255,\n    \"stddev_ts\": 0.009154,\n    \"samples_ns\": [ 4930908267, 4931660790, 4931661862 ],\n    \"samples_ts\": [ 103.835, 103.819, 103.819 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:39:28Z\",\n    \"avg_ns\": 3689737040,\n    \"stddev_ns\": 1379575,\n    \"avg_ts\": 34.690822,\n    \"stddev_ts\": 0.012959,\n    \"samples_ns\": [ 3690974184, 3689985444, 3688251493 ],\n    \"samples_ts\": [ 34.6792, 34.6885, 34.7048 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:39:08Z",
+          "avg_ns": 4931410306,
+          "stddev_ns": 440413,
+          "avg_ts": 103.824255,
+          "stddev_ts": 0.009154,
+          "samples_ns": [
+            4930908267,
+            4931660790,
+            4931661862
+          ],
+          "samples_ts": [
+            103.835,
+            103.819,
+            103.819
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:39:28Z",
+          "avg_ns": 3689737040,
+          "stddev_ns": 1379575,
+          "avg_ts": 34.690822,
+          "stddev_ts": 0.012959,
+          "samples_ns": [
+            3690974184,
+            3689985444,
+            3688251493
+          ],
+          "samples_ts": [
+            34.6792,
+            34.6885,
+            34.7048
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 322
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:40:45.797929+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:39:40Z\",\n    \"avg_ns\": 4955884296,\n    \"stddev_ns\": 534416,\n    \"avg_ts\": 103.311533,\n    \"stddev_ts\": 0.011140,\n    \"samples_ns\": [ 4955788934, 4956459974, 4955403980 ],\n    \"samples_ts\": [ 103.314, 103.3, 103.322 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:40:00Z\",\n    \"avg_ns\": 15110937496,\n    \"stddev_ns\": 14464535,\n    \"avg_ts\": 33.882763,\n    \"stddev_ts\": 0.032414,\n    \"samples_ns\": [ 15127637724, 15102401684, 15102773081 ],\n    \"samples_ts\": [ 33.8453, 33.9019, 33.9011 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:39:40Z",
+          "avg_ns": 4955884296,
+          "stddev_ns": 534416,
+          "avg_ts": 103.311533,
+          "stddev_ts": 0.01114,
+          "samples_ns": [
+            4955788934,
+            4956459974,
+            4955403980
+          ],
+          "samples_ts": [
+            103.314,
+            103.3,
+            103.322
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:40:00Z",
+          "avg_ns": 15110937496,
+          "stddev_ns": 14464535,
+          "avg_ts": 33.882763,
+          "stddev_ts": 0.032414,
+          "samples_ns": [
+            15127637724,
+            15102401684,
+            15102773081
+          ],
+          "samples_ts": [
+            33.8453,
+            33.9019,
+            33.9011
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 323
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:40:57.991481+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:40:46Z\",\n    \"avg_ns\": 586574549,\n    \"stddev_ns\": 363258,\n    \"avg_ts\": 218.216137,\n    \"stddev_ts\": 0.134524,\n    \"samples_ns\": [ 586535994, 586953923, 586233732 ],\n    \"samples_ts\": [ 218.23, 218.075, 218.343 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:40:48Z\",\n    \"avg_ns\": 2986188443,\n    \"stddev_ns\": 349590,\n    \"avg_ts\": 42.864007,\n    \"stddev_ts\": 0.004956,\n    \"samples_ns\": [ 2985843299, 2986533885, 2986188146 ],\n    \"samples_ts\": [ 42.869, 42.859, 42.864 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:40:46Z",
+          "avg_ns": 586574549,
+          "stddev_ns": 363258,
+          "avg_ts": 218.216137,
+          "stddev_ts": 0.134524,
+          "samples_ns": [
+            586535994,
+            586953923,
+            586233732
+          ],
+          "samples_ts": [
+            218.23,
+            218.075,
+            218.343
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:40:48Z",
+          "avg_ns": 2986188443,
+          "stddev_ns": 349590,
+          "avg_ts": 42.864007,
+          "stddev_ts": 0.004956,
+          "samples_ns": [
+            2985843299,
+            2986533885,
+            2986188146
+          ],
+          "samples_ts": [
+            42.869,
+            42.859,
+            42.864
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 324
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:41:38.005093+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:40:58Z\",\n    \"avg_ns\": 586261718,\n    \"stddev_ns\": 206341,\n    \"avg_ts\": 218.332541,\n    \"stddev_ts\": 0.076856,\n    \"samples_ns\": [ 586031753, 586430682, 586322719 ],\n    \"samples_ts\": [ 218.418, 218.27, 218.31 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:41:01Z\",\n    \"avg_ns\": 12268017453,\n    \"stddev_ns\": 10676933,\n    \"avg_ts\": 41.734556,\n    \"stddev_ts\": 0.036301,\n    \"samples_ns\": [ 12280217823, 12260388994, 12263445544 ],\n    \"samples_ts\": [ 41.6931, 41.7605, 41.7501 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:40:58Z",
+          "avg_ns": 586261718,
+          "stddev_ns": 206341,
+          "avg_ts": 218.332541,
+          "stddev_ts": 0.076856,
+          "samples_ns": [
+            586031753,
+            586430682,
+            586322719
+          ],
+          "samples_ts": [
+            218.418,
+            218.27,
+            218.31
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:41:01Z",
+          "avg_ns": 12268017453,
+          "stddev_ns": 10676933,
+          "avg_ts": 41.734556,
+          "stddev_ts": 0.036301,
+          "samples_ns": [
+            12280217823,
+            12260388994,
+            12263445544
+          ],
+          "samples_ts": [
+            41.6931,
+            41.7605,
+            41.7501
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 325
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:41:57.673367+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:41:38Z\",\n    \"avg_ns\": 2439993736,\n    \"stddev_ns\": 2030331,\n    \"avg_ts\": 209.836701,\n    \"stddev_ts\": 0.174572,\n    \"samples_ns\": [ 2442167943, 2439666147, 2438147118 ],\n    \"samples_ts\": [ 209.65, 209.865, 209.996 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:41:48Z\",\n    \"avg_ns\": 3002439838,\n    \"stddev_ns\": 819677,\n    \"avg_ts\": 42.631997,\n    \"stddev_ts\": 0.011637,\n    \"samples_ns\": [ 3003371064, 3001827647, 3002120803 ],\n    \"samples_ts\": [ 42.6188, 42.6407, 42.6365 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:41:38Z",
+          "avg_ns": 2439993736,
+          "stddev_ns": 2030331,
+          "avg_ts": 209.836701,
+          "stddev_ts": 0.174572,
+          "samples_ns": [
+            2442167943,
+            2439666147,
+            2438147118
+          ],
+          "samples_ts": [
+            209.65,
+            209.865,
+            209.996
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:41:48Z",
+          "avg_ns": 3002439838,
+          "stddev_ns": 819677,
+          "avg_ts": 42.631997,
+          "stddev_ts": 0.011637,
+          "samples_ns": [
+            3003371064,
+            3001827647,
+            3002120803
+          ],
+          "samples_ts": [
+            42.6188,
+            42.6407,
+            42.6365
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 326
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:42:44.961791+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:41:58Z\",\n    \"avg_ns\": 2439221130,\n    \"stddev_ns\": 311194,\n    \"avg_ts\": 209.903071,\n    \"stddev_ts\": 0.026779,\n    \"samples_ns\": [ 2439182719, 2438930924, 2439549747 ],\n    \"samples_ts\": [ 209.906, 209.928, 209.875 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:42:08Z\",\n    \"avg_ns\": 12222942810,\n    \"stddev_ns\": 2896972,\n    \"avg_ts\": 41.888441,\n    \"stddev_ts\": 0.009922,\n    \"samples_ns\": [ 12219600761, 12224670875, 12224556795 ],\n    \"samples_ts\": [ 41.8999, 41.8825, 41.8829 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:41:58Z",
+          "avg_ns": 2439221130,
+          "stddev_ns": 311194,
+          "avg_ts": 209.903071,
+          "stddev_ts": 0.026779,
+          "samples_ns": [
+            2439182719,
+            2438930924,
+            2439549747
+          ],
+          "samples_ts": [
+            209.906,
+            209.928,
+            209.875
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:42:08Z",
+          "avg_ns": 12222942810,
+          "stddev_ns": 2896972,
+          "avg_ts": 41.888441,
+          "stddev_ts": 0.009922,
+          "samples_ns": [
+            12219600761,
+            12224670875,
+            12224556795
+          ],
+          "samples_ts": [
+            41.8999,
+            41.8825,
+            41.8829
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 327
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:42:57.182706+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:42:45Z\",\n    \"avg_ns\": 586243447,\n    \"stddev_ns\": 127294,\n    \"avg_ts\": 218.339334,\n    \"stddev_ts\": 0.047410,\n    \"samples_ns\": [ 586114267, 586368769, 586247305 ],\n    \"samples_ts\": [ 218.387, 218.293, 218.338 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:42:48Z\",\n    \"avg_ns\": 2994040998,\n    \"stddev_ns\": 1936859,\n    \"avg_ts\": 42.751598,\n    \"stddev_ts\": 0.027667,\n    \"samples_ns\": [ 2995181115, 2995137230, 2991804649 ],\n    \"samples_ts\": [ 42.7353, 42.7359, 42.7835 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:42:45Z",
+          "avg_ns": 586243447,
+          "stddev_ns": 127294,
+          "avg_ts": 218.339334,
+          "stddev_ts": 0.04741,
+          "samples_ns": [
+            586114267,
+            586368769,
+            586247305
+          ],
+          "samples_ts": [
+            218.387,
+            218.293,
+            218.338
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:42:48Z",
+          "avg_ns": 2994040998,
+          "stddev_ns": 1936859,
+          "avg_ts": 42.751598,
+          "stddev_ts": 0.027667,
+          "samples_ns": [
+            2995181115,
+            2995137230,
+            2991804649
+          ],
+          "samples_ts": [
+            42.7353,
+            42.7359,
+            42.7835
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 328
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:43:37.071967+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:42:57Z\",\n    \"avg_ns\": 585331078,\n    \"stddev_ns\": 164631,\n    \"avg_ts\": 218.679670,\n    \"stddev_ts\": 0.061500,\n    \"samples_ns\": [ 585285262, 585194207, 585513765 ],\n    \"samples_ts\": [ 218.697, 218.731, 218.611 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:43:00Z\",\n    \"avg_ns\": 12225516221,\n    \"stddev_ns\": 18763613,\n    \"avg_ts\": 41.879688,\n    \"stddev_ts\": 0.064327,\n    \"samples_ns\": [ 12233496097, 12238970542, 12204082025 ],\n    \"samples_ts\": [ 41.8523, 41.8336, 41.9532 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:42:57Z",
+          "avg_ns": 585331078,
+          "stddev_ns": 164631,
+          "avg_ts": 218.67967,
+          "stddev_ts": 0.0615,
+          "samples_ns": [
+            585285262,
+            585194207,
+            585513765
+          ],
+          "samples_ts": [
+            218.697,
+            218.731,
+            218.611
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:43:00Z",
+          "avg_ns": 12225516221,
+          "stddev_ns": 18763613,
+          "avg_ts": 41.879688,
+          "stddev_ts": 0.064327,
+          "samples_ns": [
+            12233496097,
+            12238970542,
+            12204082025
+          ],
+          "samples_ts": [
+            41.8523,
+            41.8336,
+            41.9532
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 329
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:43:56.913986+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:43:37Z\",\n    \"avg_ns\": 2488648139,\n    \"stddev_ns\": 16804119,\n    \"avg_ts\": 205.740418,\n    \"stddev_ts\": 1.384297,\n    \"samples_ns\": [ 2507863512, 2481376157, 2476704748 ],\n    \"samples_ts\": [ 204.158, 206.337, 206.726 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:43:47Z\",\n    \"avg_ns\": 2989486601,\n    \"stddev_ns\": 7572963,\n    \"avg_ts\": 42.816900,\n    \"stddev_ts\": 0.108565,\n    \"samples_ns\": [ 2981097291, 2991545514, 2995816999 ],\n    \"samples_ts\": [ 42.9372, 42.7872, 42.7262 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:43:37Z",
+          "avg_ns": 2488648139,
+          "stddev_ns": 16804119,
+          "avg_ts": 205.740418,
+          "stddev_ts": 1.384297,
+          "samples_ns": [
+            2507863512,
+            2481376157,
+            2476704748
+          ],
+          "samples_ts": [
+            204.158,
+            206.337,
+            206.726
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:43:47Z",
+          "avg_ns": 2989486601,
+          "stddev_ns": 7572963,
+          "avg_ts": 42.8169,
+          "stddev_ts": 0.108565,
+          "samples_ns": [
+            2981097291,
+            2991545514,
+            2995816999
+          ],
+          "samples_ts": [
+            42.9372,
+            42.7872,
+            42.7262
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 330
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:44:43.892533+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:43:57Z\",\n    \"avg_ns\": 2425645219,\n    \"stddev_ns\": 205179,\n    \"avg_ts\": 211.077860,\n    \"stddev_ts\": 0.017332,\n    \"samples_ns\": [ 2425584692, 2425483323, 2425867643 ],\n    \"samples_ts\": [ 211.083, 211.092, 211.059 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:44:07Z\",\n    \"avg_ns\": 12129608203,\n    \"stddev_ns\": 1536761,\n    \"avg_ts\": 42.210762,\n    \"stddev_ts\": 0.005321,\n    \"samples_ns\": [ 12130744190, 12127869942, 12130210479 ],\n    \"samples_ts\": [ 42.2068, 42.2168, 42.2087 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:43:57Z",
+          "avg_ns": 2425645219,
+          "stddev_ns": 205179,
+          "avg_ts": 211.07786,
+          "stddev_ts": 0.017332,
+          "samples_ns": [
+            2425584692,
+            2425483323,
+            2425867643
+          ],
+          "samples_ts": [
+            211.083,
+            211.092,
+            211.059
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:44:07Z",
+          "avg_ns": 12129608203,
+          "stddev_ns": 1536761,
+          "avg_ts": 42.210762,
+          "stddev_ts": 0.005321,
+          "samples_ns": [
+            12130744190,
+            12127869942,
+            12130210479
+          ],
+          "samples_ts": [
+            42.2068,
+            42.2168,
+            42.2087
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 331
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:44:56.042237+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:44:44Z\",\n    \"avg_ns\": 580105206,\n    \"stddev_ns\": 127733,\n    \"avg_ts\": 220.649639,\n    \"stddev_ts\": 0.048591,\n    \"samples_ns\": [ 580167873, 580189503, 579958242 ],\n    \"samples_ts\": [ 220.626, 220.618, 220.706 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:44:47Z\",\n    \"avg_ns\": 2978363818,\n    \"stddev_ns\": 199792,\n    \"avg_ts\": 42.976617,\n    \"stddev_ts\": 0.002659,\n    \"samples_ns\": [ 2978515539, 2978417171, 2978158746 ],\n    \"samples_ts\": [ 42.9744, 42.9758, 42.9796 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:44:44Z",
+          "avg_ns": 580105206,
+          "stddev_ns": 127733,
+          "avg_ts": 220.649639,
+          "stddev_ts": 0.048591,
+          "samples_ns": [
+            580167873,
+            580189503,
+            579958242
+          ],
+          "samples_ts": [
+            220.626,
+            220.618,
+            220.706
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:44:47Z",
+          "avg_ns": 2978363818,
+          "stddev_ns": 199792,
+          "avg_ts": 42.976617,
+          "stddev_ts": 0.002659,
+          "samples_ns": [
+            2978515539,
+            2978417171,
+            2978158746
+          ],
+          "samples_ts": [
+            42.9744,
+            42.9758,
+            42.9796
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 332
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:45:36.330774+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:44:56Z\",\n    \"avg_ns\": 587596599,\n    \"stddev_ns\": 1430537,\n    \"avg_ts\": 217.837382,\n    \"stddev_ts\": 0.529900,\n    \"samples_ns\": [ 586287076, 587380085, 589122638 ],\n    \"samples_ts\": [ 218.323, 217.917, 217.272 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:44:59Z\",\n    \"avg_ns\": 12342743218,\n    \"stddev_ns\": 2588591,\n    \"avg_ts\": 41.481866,\n    \"stddev_ts\": 0.008692,\n    \"samples_ns\": [ 12343342258, 12339910059, 12344977338 ],\n    \"samples_ts\": [ 41.4799, 41.4914, 41.4744 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:44:56Z",
+          "avg_ns": 587596599,
+          "stddev_ns": 1430537,
+          "avg_ts": 217.837382,
+          "stddev_ts": 0.5299,
+          "samples_ns": [
+            586287076,
+            587380085,
+            589122638
+          ],
+          "samples_ts": [
+            218.323,
+            217.917,
+            217.272
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:44:59Z",
+          "avg_ns": 12342743218,
+          "stddev_ns": 2588591,
+          "avg_ts": 41.481866,
+          "stddev_ts": 0.008692,
+          "samples_ns": [
+            12343342258,
+            12339910059,
+            12344977338
+          ],
+          "samples_ts": [
+            41.4799,
+            41.4914,
+            41.4744
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 333
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:45:56.469966+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:45:37Z\",\n    \"avg_ns\": 2584763660,\n    \"stddev_ns\": 854087,\n    \"avg_ts\": 198.083889,\n    \"stddev_ts\": 0.065348,\n    \"samples_ns\": [ 2583790798, 2585380610, 2585119573 ],\n    \"samples_ts\": [ 198.158, 198.037, 198.057 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:45:47Z\",\n    \"avg_ns\": 2967353437,\n    \"stddev_ns\": 1098261,\n    \"avg_ts\": 43.136086,\n    \"stddev_ts\": 0.015968,\n    \"samples_ns\": [ 2966096972, 2967832843, 2968130496 ],\n    \"samples_ts\": [ 43.1544, 43.1291, 43.1248 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:45:37Z",
+          "avg_ns": 2584763660,
+          "stddev_ns": 854087,
+          "avg_ts": 198.083889,
+          "stddev_ts": 0.065348,
+          "samples_ns": [
+            2583790798,
+            2585380610,
+            2585119573
+          ],
+          "samples_ts": [
+            198.158,
+            198.037,
+            198.057
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:45:47Z",
+          "avg_ns": 2967353437,
+          "stddev_ns": 1098261,
+          "avg_ts": 43.136086,
+          "stddev_ts": 0.015968,
+          "samples_ns": [
+            2966096972,
+            2967832843,
+            2968130496
+          ],
+          "samples_ts": [
+            43.1544,
+            43.1291,
+            43.1248
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 334
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:46:44.021004+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:45:57Z\",\n    \"avg_ns\": 2576471654,\n    \"stddev_ns\": 240794,\n    \"avg_ts\": 198.721380,\n    \"stddev_ts\": 0.018571,\n    \"samples_ns\": [ 2576749698, 2576333418, 2576331846 ],\n    \"samples_ts\": [ 198.7, 198.732, 198.732 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:46:07Z\",\n    \"avg_ns\": 12123848814,\n    \"stddev_ns\": 2840998,\n    \"avg_ts\": 42.230815,\n    \"stddev_ts\": 0.009887,\n    \"samples_ns\": [ 12122319850, 12122102151, 12127124442 ],\n    \"samples_ts\": [ 42.2361, 42.2369, 42.2194 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:45:57Z",
+          "avg_ns": 2576471654,
+          "stddev_ns": 240794,
+          "avg_ts": 198.72138,
+          "stddev_ts": 0.018571,
+          "samples_ns": [
+            2576749698,
+            2576333418,
+            2576331846
+          ],
+          "samples_ts": [
+            198.7,
+            198.732,
+            198.732
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:46:07Z",
+          "avg_ns": 12123848814,
+          "stddev_ns": 2840998,
+          "avg_ts": 42.230815,
+          "stddev_ts": 0.009887,
+          "samples_ns": [
+            12122319850,
+            12122102151,
+            12127124442
+          ],
+          "samples_ts": [
+            42.2361,
+            42.2369,
+            42.2194
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 335
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:46:56.205967+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:46:44Z\",\n    \"avg_ns\": 583080444,\n    \"stddev_ns\": 4547406,\n    \"avg_ts\": 219.532680,\n    \"stddev_ts\": 1.719516,\n    \"samples_ns\": [ 577852708, 585269087, 586119539 ],\n    \"samples_ts\": [ 221.51, 218.703, 218.385 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:46:47Z\",\n    \"avg_ns\": 2981780504,\n    \"stddev_ns\": 1390395,\n    \"avg_ts\": 42.927378,\n    \"stddev_ts\": 0.020021,\n    \"samples_ns\": [ 2982911079, 2982202409, 2980228024 ],\n    \"samples_ts\": [ 42.9111, 42.9213, 42.9497 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:46:44Z",
+          "avg_ns": 583080444,
+          "stddev_ns": 4547406,
+          "avg_ts": 219.53268,
+          "stddev_ts": 1.719516,
+          "samples_ns": [
+            577852708,
+            585269087,
+            586119539
+          ],
+          "samples_ts": [
+            221.51,
+            218.703,
+            218.385
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:46:47Z",
+          "avg_ns": 2981780504,
+          "stddev_ns": 1390395,
+          "avg_ts": 42.927378,
+          "stddev_ts": 0.020021,
+          "samples_ns": [
+            2982911079,
+            2982202409,
+            2980228024
+          ],
+          "samples_ts": [
+            42.9111,
+            42.9213,
+            42.9497
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 336
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:47:35.836929+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:46:56Z\",\n    \"avg_ns\": 580141803,\n    \"stddev_ns\": 48716,\n    \"avg_ts\": 220.635713,\n    \"stddev_ts\": 0.013246,\n    \"samples_ns\": [ 580137137, 580109544, 580178730 ],\n    \"samples_ts\": [ 220.637, 220.648, 220.622 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:46:59Z\",\n    \"avg_ns\": 12147107371,\n    \"stddev_ns\": 5532943,\n    \"avg_ts\": 42.149958,\n    \"stddev_ts\": 0.019202,\n    \"samples_ns\": [ 12148367460, 12151901586, 12141053067 ],\n    \"samples_ts\": [ 42.1456, 42.1333, 42.171 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:46:56Z",
+          "avg_ns": 580141803,
+          "stddev_ns": 48716,
+          "avg_ts": 220.635713,
+          "stddev_ts": 0.013246,
+          "samples_ns": [
+            580137137,
+            580109544,
+            580178730
+          ],
+          "samples_ts": [
+            220.637,
+            220.648,
+            220.622
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:46:59Z",
+          "avg_ns": 12147107371,
+          "stddev_ns": 5532943,
+          "avg_ts": 42.149958,
+          "stddev_ts": 0.019202,
+          "samples_ns": [
+            12148367460,
+            12151901586,
+            12141053067
+          ],
+          "samples_ts": [
+            42.1456,
+            42.1333,
+            42.171
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 337
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:47:55.288296+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:47:36Z\",\n    \"avg_ns\": 2400699959,\n    \"stddev_ns\": 764637,\n    \"avg_ts\": 213.271147,\n    \"stddev_ts\": 0.067639,\n    \"samples_ns\": [ 2401560596, 2400425679, 2400113604 ],\n    \"samples_ts\": [ 213.195, 213.296, 213.323 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:47:46Z\",\n    \"avg_ns\": 2979619210,\n    \"stddev_ns\": 729747,\n    \"avg_ts\": 42.958511,\n    \"stddev_ts\": 0.010490,\n    \"samples_ns\": [ 2979214964, 2980459292, 2979183375 ],\n    \"samples_ts\": [ 42.9643, 42.9464, 42.9648 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:47:36Z",
+          "avg_ns": 2400699959,
+          "stddev_ns": 764637,
+          "avg_ts": 213.271147,
+          "stddev_ts": 0.067639,
+          "samples_ns": [
+            2401560596,
+            2400425679,
+            2400113604
+          ],
+          "samples_ts": [
+            213.195,
+            213.296,
+            213.323
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:47:46Z",
+          "avg_ns": 2979619210,
+          "stddev_ns": 729747,
+          "avg_ts": 42.958511,
+          "stddev_ts": 0.01049,
+          "samples_ns": [
+            2979214964,
+            2980459292,
+            2979183375
+          ],
+          "samples_ts": [
+            42.9643,
+            42.9464,
+            42.9648
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 338
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:48:42.266772+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:47:56Z\",\n    \"avg_ns\": 2402519639,\n    \"stddev_ns\": 290163,\n    \"avg_ts\": 213.109602,\n    \"stddev_ts\": 0.024995,\n    \"samples_ns\": [ 2402650321, 2402712333, 2402196265 ],\n    \"samples_ts\": [ 213.098, 213.093, 213.138 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:48:05Z\",\n    \"avg_ns\": 12166249154,\n    \"stddev_ns\": 4253941,\n    \"avg_ts\": 42.083639,\n    \"stddev_ts\": 0.014713,\n    \"samples_ns\": [ 12168911136, 12168491571, 12161344756 ],\n    \"samples_ts\": [ 42.0744, 42.0759, 42.1006 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:47:56Z",
+          "avg_ns": 2402519639,
+          "stddev_ns": 290163,
+          "avg_ts": 213.109602,
+          "stddev_ts": 0.024995,
+          "samples_ns": [
+            2402650321,
+            2402712333,
+            2402196265
+          ],
+          "samples_ts": [
+            213.098,
+            213.093,
+            213.138
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:48:05Z",
+          "avg_ns": 12166249154,
+          "stddev_ns": 4253941,
+          "avg_ts": 42.083639,
+          "stddev_ts": 0.014713,
+          "samples_ns": [
+            12168911136,
+            12168491571,
+            12161344756
+          ],
+          "samples_ts": [
+            42.0744,
+            42.0759,
+            42.1006
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 339
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:48:54.445832+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:48:43Z\",\n    \"avg_ns\": 577707759,\n    \"stddev_ns\": 33137,\n    \"avg_ts\": 221.565312,\n    \"stddev_ts\": 0.008749,\n    \"samples_ns\": [ 577726288, 577714710, 577682280 ],\n    \"samples_ts\": [ 221.558, 221.563, 221.575 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:48:45Z\",\n    \"avg_ns\": 2965957389,\n    \"stddev_ns\": 101519,\n    \"avg_ts\": 43.156385,\n    \"stddev_ts\": 0.001247,\n    \"samples_ns\": [ 2965864878, 2966034002, 2965973288 ],\n    \"samples_ts\": [ 43.1577, 43.1553, 43.1562 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:48:43Z",
+          "avg_ns": 577707759,
+          "stddev_ns": 33137,
+          "avg_ts": 221.565312,
+          "stddev_ts": 0.008749,
+          "samples_ns": [
+            577726288,
+            577714710,
+            577682280
+          ],
+          "samples_ts": [
+            221.558,
+            221.563,
+            221.575
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:48:45Z",
+          "avg_ns": 2965957389,
+          "stddev_ns": 101519,
+          "avg_ts": 43.156385,
+          "stddev_ts": 0.001247,
+          "samples_ns": [
+            2965864878,
+            2966034002,
+            2965973288
+          ],
+          "samples_ts": [
+            43.1577,
+            43.1553,
+            43.1562
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 340
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:49:34.236098+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:48:55Z\",\n    \"avg_ns\": 577513499,\n    \"stddev_ns\": 41543,\n    \"avg_ts\": 221.639842,\n    \"stddev_ts\": 0.015943,\n    \"samples_ns\": [ 577559626, 577479030, 577501841 ],\n    \"samples_ts\": [ 221.622, 221.653, 221.644 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:48:57Z\",\n    \"avg_ns\": 12190494324,\n    \"stddev_ns\": 5004844,\n    \"avg_ts\": 41.999942,\n    \"stddev_ts\": 0.017242,\n    \"samples_ns\": [ 12195815304, 12189786741, 12185880927 ],\n    \"samples_ts\": [ 41.9816, 42.0024, 42.0158 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:48:55Z",
+          "avg_ns": 577513499,
+          "stddev_ns": 41543,
+          "avg_ts": 221.639842,
+          "stddev_ts": 0.015943,
+          "samples_ns": [
+            577559626,
+            577479030,
+            577501841
+          ],
+          "samples_ts": [
+            221.622,
+            221.653,
+            221.644
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:48:57Z",
+          "avg_ns": 12190494324,
+          "stddev_ns": 5004844,
+          "avg_ts": 41.999942,
+          "stddev_ts": 0.017242,
+          "samples_ns": [
+            12195815304,
+            12189786741,
+            12185880927
+          ],
+          "samples_ts": [
+            41.9816,
+            42.0024,
+            42.0158
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 341
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:49:53.904089+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:49:35Z\",\n    \"avg_ns\": 2464874520,\n    \"stddev_ns\": 252243,\n    \"avg_ts\": 207.718486,\n    \"stddev_ts\": 0.021258,\n    \"samples_ns\": [ 2464591914, 2464954773, 2465076873 ],\n    \"samples_ts\": [ 207.742, 207.712, 207.701 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:49:44Z\",\n    \"avg_ns\": 2971598938,\n    \"stddev_ns\": 996314,\n    \"avg_ts\": 43.074457,\n    \"stddev_ts\": 0.014399,\n    \"samples_ns\": [ 2971608642, 2970600795, 2972587379 ],\n    \"samples_ts\": [ 43.0743, 43.0889, 43.0601 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:49:35Z",
+          "avg_ns": 2464874520,
+          "stddev_ns": 252243,
+          "avg_ts": 207.718486,
+          "stddev_ts": 0.021258,
+          "samples_ns": [
+            2464591914,
+            2464954773,
+            2465076873
+          ],
+          "samples_ts": [
+            207.742,
+            207.712,
+            207.701
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:49:44Z",
+          "avg_ns": 2971598938,
+          "stddev_ns": 996314,
+          "avg_ts": 43.074457,
+          "stddev_ts": 0.014399,
+          "samples_ns": [
+            2971608642,
+            2970600795,
+            2972587379
+          ],
+          "samples_ts": [
+            43.0743,
+            43.0889,
+            43.0601
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 342
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:50:41.053483+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:49:54Z\",\n    \"avg_ns\": 2472644884,\n    \"stddev_ns\": 348749,\n    \"avg_ts\": 207.065725,\n    \"stddev_ts\": 0.029204,\n    \"samples_ns\": [ 2472356856, 2473032631, 2472545165 ],\n    \"samples_ts\": [ 207.09, 207.033, 207.074 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:50:04Z\",\n    \"avg_ns\": 12130005935,\n    \"stddev_ns\": 5369899,\n    \"avg_ts\": 42.209383,\n    \"stddev_ts\": 0.018681,\n    \"samples_ns\": [ 12136205866, 12126986346, 12126825593 ],\n    \"samples_ts\": [ 42.1878, 42.2199, 42.2204 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:49:54Z",
+          "avg_ns": 2472644884,
+          "stddev_ns": 348749,
+          "avg_ts": 207.065725,
+          "stddev_ts": 0.029204,
+          "samples_ns": [
+            2472356856,
+            2473032631,
+            2472545165
+          ],
+          "samples_ts": [
+            207.09,
+            207.033,
+            207.074
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:50:04Z",
+          "avg_ns": 12130005935,
+          "stddev_ns": 5369899,
+          "avg_ts": 42.209383,
+          "stddev_ts": 0.018681,
+          "samples_ns": [
+            12136205866,
+            12126986346,
+            12126825593
+          ],
+          "samples_ts": [
+            42.1878,
+            42.2199,
+            42.2204
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 343
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:50:53.219523+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:50:41Z\",\n    \"avg_ns\": 587168640,\n    \"stddev_ns\": 377465,\n    \"avg_ts\": 217.995354,\n    \"stddev_ts\": 0.140191,\n    \"samples_ns\": [ 586734051, 587357130, 587414739 ],\n    \"samples_ts\": [ 218.157, 217.925, 217.904 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:50:44Z\",\n    \"avg_ns\": 2972659207,\n    \"stddev_ns\": 3342974,\n    \"avg_ts\": 43.059126,\n    \"stddev_ts\": 0.048392,\n    \"samples_ns\": [ 2976516418, 2970600512, 2970860691 ],\n    \"samples_ts\": [ 43.0033, 43.0889, 43.0852 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:50:41Z",
+          "avg_ns": 587168640,
+          "stddev_ns": 377465,
+          "avg_ts": 217.995354,
+          "stddev_ts": 0.140191,
+          "samples_ns": [
+            586734051,
+            587357130,
+            587414739
+          ],
+          "samples_ts": [
+            218.157,
+            217.925,
+            217.904
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:50:44Z",
+          "avg_ns": 2972659207,
+          "stddev_ns": 3342974,
+          "avg_ts": 43.059126,
+          "stddev_ts": 0.048392,
+          "samples_ns": [
+            2976516418,
+            2970600512,
+            2970860691
+          ],
+          "samples_ts": [
+            43.0033,
+            43.0889,
+            43.0852
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 344
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:51:32.963873+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:50:53Z\",\n    \"avg_ns\": 579460858,\n    \"stddev_ns\": 211119,\n    \"avg_ts\": 220.895008,\n    \"stddev_ts\": 0.079411,\n    \"samples_ns\": [ 579700550, 579359005, 579323021 ],\n    \"samples_ts\": [ 220.804, 220.934, 220.948 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:50:56Z\",\n    \"avg_ns\": 12186169533,\n    \"stddev_ns\": 1196136,\n    \"avg_ts\": 42.014843,\n    \"stddev_ts\": 0.004106,\n    \"samples_ns\": [ 12186146628, 12184990120, 12187371852 ],\n    \"samples_ts\": [ 42.0149, 42.0189, 42.0107 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:50:53Z",
+          "avg_ns": 579460858,
+          "stddev_ns": 211119,
+          "avg_ts": 220.895008,
+          "stddev_ts": 0.079411,
+          "samples_ns": [
+            579700550,
+            579359005,
+            579323021
+          ],
+          "samples_ts": [
+            220.804,
+            220.934,
+            220.948
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:50:56Z",
+          "avg_ns": 12186169533,
+          "stddev_ns": 1196136,
+          "avg_ts": 42.014843,
+          "stddev_ts": 0.004106,
+          "samples_ns": [
+            12186146628,
+            12184990120,
+            12187371852
+          ],
+          "samples_ts": [
+            42.0149,
+            42.0189,
+            42.0107
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 345
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:51:53.406890+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:51:33Z\",\n    \"avg_ns\": 2652131477,\n    \"stddev_ns\": 10651269,\n    \"avg_ts\": 193.054350,\n    \"stddev_ts\": 0.777083,\n    \"samples_ns\": [ 2658910813, 2657628581, 2639855039 ],\n    \"samples_ts\": [ 192.56, 192.653, 193.95 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:51:44Z\",\n    \"avg_ns\": 2975464065,\n    \"stddev_ns\": 391586,\n    \"avg_ts\": 43.018500,\n    \"stddev_ts\": 0.005661,\n    \"samples_ns\": [ 2975877826, 2975415114, 2975099255 ],\n    \"samples_ts\": [ 43.0125, 43.0192, 43.0238 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:51:33Z",
+          "avg_ns": 2652131477,
+          "stddev_ns": 10651269,
+          "avg_ts": 193.05435,
+          "stddev_ts": 0.777083,
+          "samples_ns": [
+            2658910813,
+            2657628581,
+            2639855039
+          ],
+          "samples_ts": [
+            192.56,
+            192.653,
+            193.95
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:51:44Z",
+          "avg_ns": 2975464065,
+          "stddev_ns": 391586,
+          "avg_ts": 43.0185,
+          "stddev_ts": 0.005661,
+          "samples_ns": [
+            2975877826,
+            2975415114,
+            2975099255
+          ],
+          "samples_ts": [
+            43.0125,
+            43.0192,
+            43.0238
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 346
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:52:41.140124+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:51:54Z\",\n    \"avg_ns\": 2582465447,\n    \"stddev_ns\": 447842,\n    \"avg_ts\": 198.260159,\n    \"stddev_ts\": 0.034159,\n    \"samples_ns\": [ 2582933157, 2582415756, 2582047429 ],\n    \"samples_ts\": [ 198.224, 198.264, 198.292 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:52:04Z\",\n    \"avg_ns\": 12175754199,\n    \"stddev_ns\": 4316891,\n    \"avg_ts\": 42.050787,\n    \"stddev_ts\": 0.014910,\n    \"samples_ns\": [ 12171098723, 12179624810, 12176539064 ],\n    \"samples_ts\": [ 42.0669, 42.0374, 42.0481 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:51:54Z",
+          "avg_ns": 2582465447,
+          "stddev_ns": 447842,
+          "avg_ts": 198.260159,
+          "stddev_ts": 0.034159,
+          "samples_ns": [
+            2582933157,
+            2582415756,
+            2582047429
+          ],
+          "samples_ts": [
+            198.224,
+            198.264,
+            198.292
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:52:04Z",
+          "avg_ns": 12175754199,
+          "stddev_ns": 4316891,
+          "avg_ts": 42.050787,
+          "stddev_ts": 0.01491,
+          "samples_ns": [
+            12171098723,
+            12179624810,
+            12176539064
+          ],
+          "samples_ts": [
+            42.0669,
+            42.0374,
+            42.0481
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 347
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:52:53.367534+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:52:41Z\",\n    \"avg_ns\": 588463794,\n    \"stddev_ns\": 608304,\n    \"avg_ts\": 217.515661,\n    \"stddev_ts\": 0.224778,\n    \"samples_ns\": [ 587931463, 588333094, 589126825 ],\n    \"samples_ts\": [ 217.712, 217.564, 217.271 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:52:44Z\",\n    \"avg_ns\": 2991839726,\n    \"stddev_ns\": 960872,\n    \"avg_ts\": 42.783043,\n    \"stddev_ts\": 0.013695,\n    \"samples_ns\": [ 2990959509, 2991700003, 2992859668 ],\n    \"samples_ts\": [ 42.7956, 42.785, 42.7685 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:52:41Z",
+          "avg_ns": 588463794,
+          "stddev_ns": 608304,
+          "avg_ts": 217.515661,
+          "stddev_ts": 0.224778,
+          "samples_ns": [
+            587931463,
+            588333094,
+            589126825
+          ],
+          "samples_ts": [
+            217.712,
+            217.564,
+            217.271
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:52:44Z",
+          "avg_ns": 2991839726,
+          "stddev_ns": 960872,
+          "avg_ts": 42.783043,
+          "stddev_ts": 0.013695,
+          "samples_ns": [
+            2990959509,
+            2991700003,
+            2992859668
+          ],
+          "samples_ts": [
+            42.7956,
+            42.785,
+            42.7685
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 348
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:53:33.324344+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:52:54Z\",\n    \"avg_ns\": 585636672,\n    \"stddev_ns\": 134952,\n    \"avg_ts\": 218.565555,\n    \"stddev_ts\": 0.048723,\n    \"samples_ns\": [ 585745429, 585491906, 585672683 ],\n    \"samples_ts\": [ 218.525, 218.62, 218.552 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:52:56Z\",\n    \"avg_ns\": 12247989885,\n    \"stddev_ns\": 2213620,\n    \"avg_ts\": 41.802779,\n    \"stddev_ts\": 0.007546,\n    \"samples_ns\": [ 12245583959, 12248453589, 12249932108 ],\n    \"samples_ts\": [ 41.811, 41.8012, 41.7962 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:52:54Z",
+          "avg_ns": 585636672,
+          "stddev_ns": 134952,
+          "avg_ts": 218.565555,
+          "stddev_ts": 0.048723,
+          "samples_ns": [
+            585745429,
+            585491906,
+            585672683
+          ],
+          "samples_ts": [
+            218.525,
+            218.62,
+            218.552
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:52:56Z",
+          "avg_ns": 12247989885,
+          "stddev_ns": 2213620,
+          "avg_ts": 41.802779,
+          "stddev_ts": 0.007546,
+          "samples_ns": [
+            12245583959,
+            12248453589,
+            12249932108
+          ],
+          "samples_ts": [
+            41.811,
+            41.8012,
+            41.7962
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 349
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:53:52.942562+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:53:34Z\",\n    \"avg_ns\": 2436603237,\n    \"stddev_ns\": 86501,\n    \"avg_ts\": 210.128589,\n    \"stddev_ts\": 0.007460,\n    \"samples_ns\": [ 2436519071, 2436598742, 2436691898 ],\n    \"samples_ts\": [ 210.136, 210.129, 210.121 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:53:43Z\",\n    \"avg_ns\": 2990727516,\n    \"stddev_ns\": 1861930,\n    \"avg_ts\": 42.798962,\n    \"stddev_ts\": 0.026626,\n    \"samples_ns\": [ 2989266563, 2992822913, 2990093073 ],\n    \"samples_ts\": [ 42.8199, 42.769, 42.808 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:53:34Z",
+          "avg_ns": 2436603237,
+          "stddev_ns": 86501,
+          "avg_ts": 210.128589,
+          "stddev_ts": 0.00746,
+          "samples_ns": [
+            2436519071,
+            2436598742,
+            2436691898
+          ],
+          "samples_ts": [
+            210.136,
+            210.129,
+            210.121
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:53:43Z",
+          "avg_ns": 2990727516,
+          "stddev_ns": 1861930,
+          "avg_ts": 42.798962,
+          "stddev_ts": 0.026626,
+          "samples_ns": [
+            2989266563,
+            2992822913,
+            2990093073
+          ],
+          "samples_ts": [
+            42.8199,
+            42.769,
+            42.808
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 350
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:54:39.780662+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:53:53Z\",\n    \"avg_ns\": 2401059162,\n    \"stddev_ns\": 312261,\n    \"avg_ts\": 213.239229,\n    \"stddev_ts\": 0.027041,\n    \"samples_ns\": [ 2400736262, 2401341056, 2401100170 ],\n    \"samples_ts\": [ 213.268, 213.214, 213.236 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:54:03Z\",\n    \"avg_ns\": 12121093340,\n    \"stddev_ns\": 4764070,\n    \"avg_ts\": 42.240418,\n    \"stddev_ts\": 0.016590,\n    \"samples_ns\": [ 12119231673, 12126504439, 12117543910 ],\n    \"samples_ts\": [ 42.2469, 42.2216, 42.2528 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:53:53Z",
+          "avg_ns": 2401059162,
+          "stddev_ns": 312261,
+          "avg_ts": 213.239229,
+          "stddev_ts": 0.027041,
+          "samples_ns": [
+            2400736262,
+            2401341056,
+            2401100170
+          ],
+          "samples_ts": [
+            213.268,
+            213.214,
+            213.236
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:54:03Z",
+          "avg_ns": 12121093340,
+          "stddev_ns": 4764070,
+          "avg_ts": 42.240418,
+          "stddev_ts": 0.01659,
+          "samples_ns": [
+            12119231673,
+            12126504439,
+            12117543910
+          ],
+          "samples_ts": [
+            42.2469,
+            42.2216,
+            42.2528
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 351
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:54:51.840351+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:54:40Z\",\n    \"avg_ns\": 582611159,\n    \"stddev_ns\": 4240441,\n    \"avg_ts\": 219.708347,\n    \"stddev_ts\": 1.604136,\n    \"samples_ns\": [ 586013733, 583959169, 577860575 ],\n    \"samples_ts\": [ 218.425, 219.193, 221.507 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:54:42Z\",\n    \"avg_ns\": 2943557965,\n    \"stddev_ns\": 401991,\n    \"avg_ts\": 43.484791,\n    \"stddev_ts\": 0.005830,\n    \"samples_ns\": [ 2943792663, 2943778845, 2943102389 ],\n    \"samples_ts\": [ 43.4813, 43.4815, 43.4915 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:54:40Z",
+          "avg_ns": 582611159,
+          "stddev_ns": 4240441,
+          "avg_ts": 219.708347,
+          "stddev_ts": 1.604136,
+          "samples_ns": [
+            586013733,
+            583959169,
+            577860575
+          ],
+          "samples_ts": [
+            218.425,
+            219.193,
+            221.507
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:54:42Z",
+          "avg_ns": 2943557965,
+          "stddev_ns": 401991,
+          "avg_ts": 43.484791,
+          "stddev_ts": 0.00583,
+          "samples_ns": [
+            2943792663,
+            2943778845,
+            2943102389
+          ],
+          "samples_ts": [
+            43.4813,
+            43.4815,
+            43.4915
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 352
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:55:31.274743+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:54:52Z\",\n    \"avg_ns\": 575865643,\n    \"stddev_ns\": 166882,\n    \"avg_ts\": 222.274082,\n    \"stddev_ts\": 0.064421,\n    \"samples_ns\": [ 575679371, 576001521, 575916037 ],\n    \"samples_ts\": [ 222.346, 222.222, 222.255 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:54:54Z\",\n    \"avg_ns\": 12082437655,\n    \"stddev_ns\": 8883014,\n    \"avg_ts\": 42.375570,\n    \"stddev_ts\": 0.031141,\n    \"samples_ns\": [ 12092289940, 12075045110, 12079977917 ],\n    \"samples_ts\": [ 42.341, 42.4015, 42.3842 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:54:52Z",
+          "avg_ns": 575865643,
+          "stddev_ns": 166882,
+          "avg_ts": 222.274082,
+          "stddev_ts": 0.064421,
+          "samples_ns": [
+            575679371,
+            576001521,
+            575916037
+          ],
+          "samples_ts": [
+            222.346,
+            222.222,
+            222.255
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:54:54Z",
+          "avg_ns": 12082437655,
+          "stddev_ns": 8883014,
+          "avg_ts": 42.37557,
+          "stddev_ts": 0.031141,
+          "samples_ns": [
+            12092289940,
+            12075045110,
+            12079977917
+          ],
+          "samples_ts": [
+            42.341,
+            42.4015,
+            42.3842
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 353
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:55:50.849343+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:55:32Z\",\n    \"avg_ns\": 2432070269,\n    \"stddev_ns\": 726006,\n    \"avg_ts\": 210.520245,\n    \"stddev_ts\": 0.062544,\n    \"samples_ns\": [ 2431817101, 2432885440, 2431508268 ],\n    \"samples_ts\": [ 210.542, 210.45, 210.569 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:55:41Z\",\n    \"avg_ns\": 2971820617,\n    \"stddev_ns\": 521251,\n    \"avg_ts\": 43.071241,\n    \"stddev_ts\": 0.007555,\n    \"samples_ns\": [ 2971271481, 2971881784, 2972308586 ],\n    \"samples_ts\": [ 43.0792, 43.0704, 43.0642 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:55:32Z",
+          "avg_ns": 2432070269,
+          "stddev_ns": 726006,
+          "avg_ts": 210.520245,
+          "stddev_ts": 0.062544,
+          "samples_ns": [
+            2431817101,
+            2432885440,
+            2431508268
+          ],
+          "samples_ts": [
+            210.542,
+            210.45,
+            210.569
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:55:41Z",
+          "avg_ns": 2971820617,
+          "stddev_ns": 521251,
+          "avg_ts": 43.071241,
+          "stddev_ts": 0.007555,
+          "samples_ns": [
+            2971271481,
+            2971881784,
+            2972308586
+          ],
+          "samples_ts": [
+            43.0792,
+            43.0704,
+            43.0642
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 354
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:56:37.579988+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:55:51Z\",\n    \"avg_ns\": 2400555666,\n    \"stddev_ns\": 400988,\n    \"avg_ts\": 213.283956,\n    \"stddev_ts\": 0.035088,\n    \"samples_ns\": [ 2400355675, 2400300707, 2401010618 ],\n    \"samples_ts\": [ 213.302, 213.307, 213.244 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:56:01Z\",\n    \"avg_ns\": 12088436931,\n    \"stddev_ns\": 13638698,\n    \"avg_ts\": 42.354561,\n    \"stddev_ts\": 0.047813,\n    \"samples_ns\": [ 12072783338, 12097751575, 12094775882 ],\n    \"samples_ts\": [ 42.4094, 42.3219, 42.3323 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:55:51Z",
+          "avg_ns": 2400555666,
+          "stddev_ns": 400988,
+          "avg_ts": 213.283956,
+          "stddev_ts": 0.035088,
+          "samples_ns": [
+            2400355675,
+            2400300707,
+            2401010618
+          ],
+          "samples_ts": [
+            213.302,
+            213.307,
+            213.244
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:56:01Z",
+          "avg_ns": 12088436931,
+          "stddev_ns": 13638698,
+          "avg_ts": 42.354561,
+          "stddev_ts": 0.047813,
+          "samples_ns": [
+            12072783338,
+            12097751575,
+            12094775882
+          ],
+          "samples_ts": [
+            42.4094,
+            42.3219,
+            42.3323
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 355
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:56:49.618779+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:56:38Z\",\n    \"avg_ns\": 575688429,\n    \"stddev_ns\": 161309,\n    \"avg_ts\": 222.342503,\n    \"stddev_ts\": 0.060910,\n    \"samples_ns\": [ 575834377, 575709763, 575521149 ],\n    \"samples_ts\": [ 222.286, 222.334, 222.407 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:56:40Z\",\n    \"avg_ns\": 2949053136,\n    \"stddev_ns\": 1194643,\n    \"avg_ts\": 43.403767,\n    \"stddev_ts\": 0.017542,\n    \"samples_ns\": [ 2948538058, 2950416251, 2948205101 ],\n    \"samples_ts\": [ 43.4113, 43.3837, 43.4162 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:56:38Z",
+          "avg_ns": 575688429,
+          "stddev_ns": 161309,
+          "avg_ts": 222.342503,
+          "stddev_ts": 0.06091,
+          "samples_ns": [
+            575834377,
+            575709763,
+            575521149
+          ],
+          "samples_ts": [
+            222.286,
+            222.334,
+            222.407
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:56:40Z",
+          "avg_ns": 2949053136,
+          "stddev_ns": 1194643,
+          "avg_ts": 43.403767,
+          "stddev_ts": 0.017542,
+          "samples_ns": [
+            2948538058,
+            2950416251,
+            2948205101
+          ],
+          "samples_ts": [
+            43.4113,
+            43.3837,
+            43.4162
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 356
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:57:29.273530+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:56:50Z\",\n    \"avg_ns\": 584592662,\n    \"stddev_ns\": 508801,\n    \"avg_ts\": 218.955989,\n    \"stddev_ts\": 0.190260,\n    \"samples_ns\": [ 585178553, 584270671, 584328763 ],\n    \"samples_ts\": [ 218.737, 219.077, 219.055 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:56:52Z\",\n    \"avg_ns\": 12151118600,\n    \"stddev_ns\": 48362591,\n    \"avg_ts\": 42.136483,\n    \"stddev_ts\": 0.167610,\n    \"samples_ns\": [ 12201675492, 12146380762, 12105299548 ],\n    \"samples_ts\": [ 41.9615, 42.1525, 42.2955 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:56:50Z",
+          "avg_ns": 584592662,
+          "stddev_ns": 508801,
+          "avg_ts": 218.955989,
+          "stddev_ts": 0.19026,
+          "samples_ns": [
+            585178553,
+            584270671,
+            584328763
+          ],
+          "samples_ts": [
+            218.737,
+            219.077,
+            219.055
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:56:52Z",
+          "avg_ns": 12151118600,
+          "stddev_ns": 48362591,
+          "avg_ts": 42.136483,
+          "stddev_ts": 0.16761,
+          "samples_ns": [
+            12201675492,
+            12146380762,
+            12105299548
+          ],
+          "samples_ts": [
+            41.9615,
+            42.1525,
+            42.2955
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 357
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:57:49.295625+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:57:30Z\",\n    \"avg_ns\": 2553113004,\n    \"stddev_ns\": 711591,\n    \"avg_ts\": 200.539508,\n    \"stddev_ts\": 0.055615,\n    \"samples_ns\": [ 2553248263, 2552347138, 2553743613 ],\n    \"samples_ts\": [ 200.529, 200.6, 200.49 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:57:40Z\",\n    \"avg_ns\": 2957901689,\n    \"stddev_ns\": 827610,\n    \"avg_ts\": 43.273922,\n    \"stddev_ts\": 0.012108,\n    \"samples_ns\": [ 2957065628, 2958720581, 2957918858 ],\n    \"samples_ts\": [ 43.2862, 43.2619, 43.2737 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:57:30Z",
+          "avg_ns": 2553113004,
+          "stddev_ns": 711591,
+          "avg_ts": 200.539508,
+          "stddev_ts": 0.055615,
+          "samples_ns": [
+            2553248263,
+            2552347138,
+            2553743613
+          ],
+          "samples_ts": [
+            200.529,
+            200.6,
+            200.49
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:57:40Z",
+          "avg_ns": 2957901689,
+          "stddev_ns": 827610,
+          "avg_ts": 43.273922,
+          "stddev_ts": 0.012108,
+          "samples_ns": [
+            2957065628,
+            2958720581,
+            2957918858
+          ],
+          "samples_ts": [
+            43.2862,
+            43.2619,
+            43.2737
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 358
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:58:36.575707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:57:50Z\",\n    \"avg_ns\": 2530507997,\n    \"stddev_ns\": 66831,\n    \"avg_ts\": 202.330916,\n    \"stddev_ts\": 0.003518,\n    \"samples_ns\": [ 2530463907, 2530551905, 2530508180 ],\n    \"samples_ts\": [ 202.334, 202.327, 202.331 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:58:00Z\",\n    \"avg_ns\": 12096273509,\n    \"stddev_ns\": 11040809,\n    \"avg_ts\": 42.327109,\n    \"stddev_ts\": 0.038648,\n    \"samples_ns\": [ 12083864779, 12099947079, 12105008670 ],\n    \"samples_ts\": [ 42.3706, 42.3142, 42.2965 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:57:50Z",
+          "avg_ns": 2530507997,
+          "stddev_ns": 66831,
+          "avg_ts": 202.330916,
+          "stddev_ts": 0.003518,
+          "samples_ns": [
+            2530463907,
+            2530551905,
+            2530508180
+          ],
+          "samples_ts": [
+            202.334,
+            202.327,
+            202.331
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:58:00Z",
+          "avg_ns": 12096273509,
+          "stddev_ns": 11040809,
+          "avg_ts": 42.327109,
+          "stddev_ts": 0.038648,
+          "samples_ns": [
+            12083864779,
+            12099947079,
+            12105008670
+          ],
+          "samples_ts": [
+            42.3706,
+            42.3142,
+            42.2965
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 359
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:58:48.480387+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:58:37Z\",\n    \"avg_ns\": 405733229,\n    \"stddev_ns\": 283011,\n    \"avg_ts\": 315.478330,\n    \"stddev_ts\": 0.219971,\n    \"samples_ns\": [ 406058443, 405598418, 405542826 ],\n    \"samples_ts\": [ 315.226, 315.583, 315.626 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:58:38Z\",\n    \"avg_ns\": 3130023348,\n    \"stddev_ns\": 187050,\n    \"avg_ts\": 40.894264,\n    \"stddev_ts\": 0.002444,\n    \"samples_ns\": [ 3130208472, 3130027144, 3129834428 ],\n    \"samples_ts\": [ 40.8918, 40.8942, 40.8967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:58:37Z",
+          "avg_ns": 405733229,
+          "stddev_ns": 283011,
+          "avg_ts": 315.47833,
+          "stddev_ts": 0.219971,
+          "samples_ns": [
+            406058443,
+            405598418,
+            405542826
+          ],
+          "samples_ts": [
+            315.226,
+            315.583,
+            315.626
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:58:38Z",
+          "avg_ns": 3130023348,
+          "stddev_ns": 187050,
+          "avg_ts": 40.894264,
+          "stddev_ts": 0.002444,
+          "samples_ns": [
+            3130208472,
+            3130027144,
+            3129834428
+          ],
+          "samples_ts": [
+            40.8918,
+            40.8942,
+            40.8967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 360
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:59:29.118019+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:58:49Z\",\n    \"avg_ns\": 406496025,\n    \"stddev_ns\": 56706,\n    \"avg_ts\": 314.886231,\n    \"stddev_ts\": 0.037968,\n    \"samples_ns\": [ 406475600, 406460525, 406551952 ],\n    \"samples_ts\": [ 314.902, 314.914, 314.843 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:58:50Z\",\n    \"avg_ns\": 12715073933,\n    \"stddev_ns\": 4613713,\n    \"avg_ts\": 40.267170,\n    \"stddev_ts\": 0.014604,\n    \"samples_ns\": [ 12720275755, 12713462683, 12711483362 ],\n    \"samples_ts\": [ 40.2507, 40.2723, 40.2785 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:58:49Z",
+          "avg_ns": 406496025,
+          "stddev_ns": 56706,
+          "avg_ts": 314.886231,
+          "stddev_ts": 0.037968,
+          "samples_ns": [
+            406475600,
+            406460525,
+            406551952
+          ],
+          "samples_ts": [
+            314.902,
+            314.914,
+            314.843
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:58:50Z",
+          "avg_ns": 12715073933,
+          "stddev_ns": 4613713,
+          "avg_ts": 40.26717,
+          "stddev_ts": 0.014604,
+          "samples_ns": [
+            12720275755,
+            12713462683,
+            12711483362
+          ],
+          "samples_ts": [
+            40.2507,
+            40.2723,
+            40.2785
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 361
+    },
+    {
+      "timestamp_utc": "2025-12-08T23:59:46.054556+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:59:29Z\",\n    \"avg_ns\": 1661679100,\n    \"stddev_ns\": 387612,\n    \"avg_ts\": 308.122079,\n    \"stddev_ts\": 0.071868,\n    \"samples_ns\": [ 1662110316, 1661359652, 1661567332 ],\n    \"samples_ts\": [ 308.042, 308.181, 308.143 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:59:36Z\",\n    \"avg_ns\": 3132720895,\n    \"stddev_ns\": 1975793,\n    \"avg_ts\": 40.859061,\n    \"stddev_ts\": 0.025779,\n    \"samples_ns\": [ 3130440674, 3133796166, 3133925845 ],\n    \"samples_ts\": [ 40.8888, 40.845, 40.8433 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:59:29Z",
+          "avg_ns": 1661679100,
+          "stddev_ns": 387612,
+          "avg_ts": 308.122079,
+          "stddev_ts": 0.071868,
+          "samples_ns": [
+            1662110316,
+            1661359652,
+            1661567332
+          ],
+          "samples_ts": [
+            308.042,
+            308.181,
+            308.143
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:59:36Z",
+          "avg_ns": 3132720895,
+          "stddev_ns": 1975793,
+          "avg_ts": 40.859061,
+          "stddev_ts": 0.025779,
+          "samples_ns": [
+            3130440674,
+            3133796166,
+            3133925845
+          ],
+          "samples_ts": [
+            40.8888,
+            40.845,
+            40.8433
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 362
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:00:31.907970+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:59:46Z\",\n    \"avg_ns\": 1673298013,\n    \"stddev_ns\": 146154,\n    \"avg_ts\": 305.982556,\n    \"stddev_ts\": 0.025659,\n    \"samples_ns\": [ 1673325774, 1673145895, 1673422371 ],\n    \"samples_ts\": [ 305.977, 306.01, 305.96 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-08T23:59:53Z\",\n    \"avg_ns\": 12765020342,\n    \"stddev_ns\": 56769576,\n    \"avg_ts\": 40.110140,\n    \"stddev_ts\": 0.178586,\n    \"samples_ns\": [ 12703893106, 12816086832, 12775081089 ],\n    \"samples_ts\": [ 40.3026, 39.9498, 40.078 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:59:46Z",
+          "avg_ns": 1673298013,
+          "stddev_ns": 146154,
+          "avg_ts": 305.982556,
+          "stddev_ts": 0.025659,
+          "samples_ns": [
+            1673325774,
+            1673145895,
+            1673422371
+          ],
+          "samples_ts": [
+            305.977,
+            306.01,
+            305.96
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-08T23:59:53Z",
+          "avg_ns": 12765020342,
+          "stddev_ns": 56769576,
+          "avg_ts": 40.11014,
+          "stddev_ts": 0.178586,
+          "samples_ns": [
+            12703893106,
+            12816086832,
+            12775081089
+          ],
+          "samples_ts": [
+            40.3026,
+            39.9498,
+            40.078
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 363
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:00:43.822277+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:00:32Z\",\n    \"avg_ns\": 408239897,\n    \"stddev_ns\": 294696,\n    \"avg_ts\": 313.541242,\n    \"stddev_ts\": 0.226394,\n    \"samples_ns\": [ 408490900, 407915412, 408313379 ],\n    \"samples_ts\": [ 313.348, 313.791, 313.485 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:00:34Z\",\n    \"avg_ns\": 3128078430,\n    \"stddev_ns\": 268103,\n    \"avg_ts\": 40.919690,\n    \"stddev_ts\": 0.003430,\n    \"samples_ns\": [ 3127891155, 3127966040, 3128378096 ],\n    \"samples_ts\": [ 40.9221, 40.9212, 40.9158 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:00:32Z",
+          "avg_ns": 408239897,
+          "stddev_ns": 294696,
+          "avg_ts": 313.541242,
+          "stddev_ts": 0.226394,
+          "samples_ns": [
+            408490900,
+            407915412,
+            408313379
+          ],
+          "samples_ts": [
+            313.348,
+            313.791,
+            313.485
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:00:34Z",
+          "avg_ns": 3128078430,
+          "stddev_ns": 268103,
+          "avg_ts": 40.91969,
+          "stddev_ts": 0.00343,
+          "samples_ns": [
+            3127891155,
+            3127966040,
+            3128378096
+          ],
+          "samples_ts": [
+            40.9221,
+            40.9212,
+            40.9158
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 364
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:01:25.200392+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:00:44Z\",\n    \"avg_ns\": 407090620,\n    \"stddev_ns\": 278175,\n    \"avg_ts\": 314.426404,\n    \"stddev_ts\": 0.214877,\n    \"samples_ns\": [ 407354506, 406800074, 407117280 ],\n    \"samples_ts\": [ 314.223, 314.651, 314.406 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:00:46Z\",\n    \"avg_ns\": 12945815214,\n    \"stddev_ns\": 7339649,\n    \"avg_ts\": 39.549468,\n    \"stddev_ts\": 0.022429,\n    \"samples_ns\": [ 12937476369, 12951295437, 12948673836 ],\n    \"samples_ts\": [ 39.575, 39.5327, 39.5407 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:00:44Z",
+          "avg_ns": 407090620,
+          "stddev_ns": 278175,
+          "avg_ts": 314.426404,
+          "stddev_ts": 0.214877,
+          "samples_ns": [
+            407354506,
+            406800074,
+            407117280
+          ],
+          "samples_ts": [
+            314.223,
+            314.651,
+            314.406
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:00:46Z",
+          "avg_ns": 12945815214,
+          "stddev_ns": 7339649,
+          "avg_ts": 39.549468,
+          "stddev_ts": 0.022429,
+          "samples_ns": [
+            12937476369,
+            12951295437,
+            12948673836
+          ],
+          "samples_ts": [
+            39.575,
+            39.5327,
+            39.5407
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 365
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:01:42.255098+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:01:25Z\",\n    \"avg_ns\": 1696275893,\n    \"stddev_ns\": 1212548,\n    \"avg_ts\": 301.837795,\n    \"stddev_ts\": 0.215839,\n    \"samples_ns\": [ 1696745620, 1697183304, 1694898755 ],\n    \"samples_ts\": [ 301.754, 301.676, 302.083 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:01:32Z\",\n    \"avg_ns\": 3126014402,\n    \"stddev_ns\": 1625068,\n    \"avg_ts\": 40.946716,\n    \"stddev_ts\": 0.021280,\n    \"samples_ns\": [ 3127032977, 3124141443, 3126868787 ],\n    \"samples_ts\": [ 40.9334, 40.9713, 40.9355 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:01:25Z",
+          "avg_ns": 1696275893,
+          "stddev_ns": 1212548,
+          "avg_ts": 301.837795,
+          "stddev_ts": 0.215839,
+          "samples_ns": [
+            1696745620,
+            1697183304,
+            1694898755
+          ],
+          "samples_ts": [
+            301.754,
+            301.676,
+            302.083
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:01:32Z",
+          "avg_ns": 3126014402,
+          "stddev_ns": 1625068,
+          "avg_ts": 40.946716,
+          "stddev_ts": 0.02128,
+          "samples_ns": [
+            3127032977,
+            3124141443,
+            3126868787
+          ],
+          "samples_ts": [
+            40.9334,
+            40.9713,
+            40.9355
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 366
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:02:28.501417+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:01:43Z\",\n    \"avg_ns\": 1734076496,\n    \"stddev_ns\": 918426,\n    \"avg_ts\": 295.258079,\n    \"stddev_ts\": 0.156022,\n    \"samples_ns\": [ 1735102699, 1733787494, 1733339297 ],\n    \"samples_ts\": [ 295.083, 295.307, 295.384 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:01:49Z\",\n    \"avg_ns\": 12814067225,\n    \"stddev_ns\": 6095156,\n    \"avg_ts\": 39.956094,\n    \"stddev_ts\": 0.019002,\n    \"samples_ns\": [ 12807418442, 12819384244, 12815398991 ],\n    \"samples_ts\": [ 39.9768, 39.9395, 39.9519 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:01:43Z",
+          "avg_ns": 1734076496,
+          "stddev_ns": 918426,
+          "avg_ts": 295.258079,
+          "stddev_ts": 0.156022,
+          "samples_ns": [
+            1735102699,
+            1733787494,
+            1733339297
+          ],
+          "samples_ts": [
+            295.083,
+            295.307,
+            295.384
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:01:49Z",
+          "avg_ns": 12814067225,
+          "stddev_ns": 6095156,
+          "avg_ts": 39.956094,
+          "stddev_ts": 0.019002,
+          "samples_ns": [
+            12807418442,
+            12819384244,
+            12815398991
+          ],
+          "samples_ts": [
+            39.9768,
+            39.9395,
+            39.9519
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 367
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:02:40.435614+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:02:29Z\",\n    \"avg_ns\": 406507184,\n    \"stddev_ns\": 168855,\n    \"avg_ts\": 314.877619,\n    \"stddev_ts\": 0.129836,\n    \"samples_ns\": [ 406694983, 406372609, 406453961 ],\n    \"samples_ts\": [ 314.732, 314.982, 314.919 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:02:30Z\",\n    \"avg_ns\": 3136658909,\n    \"stddev_ns\": 4899331,\n    \"avg_ts\": 40.807819,\n    \"stddev_ts\": 0.063680,\n    \"samples_ns\": [ 3133528351, 3142304631, 3134143746 ],\n    \"samples_ts\": [ 40.8485, 40.7344, 40.8405 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:02:29Z",
+          "avg_ns": 406507184,
+          "stddev_ns": 168855,
+          "avg_ts": 314.877619,
+          "stddev_ts": 0.129836,
+          "samples_ns": [
+            406694983,
+            406372609,
+            406453961
+          ],
+          "samples_ts": [
+            314.732,
+            314.982,
+            314.919
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:02:30Z",
+          "avg_ns": 3136658909,
+          "stddev_ns": 4899331,
+          "avg_ts": 40.807819,
+          "stddev_ts": 0.06368,
+          "samples_ns": [
+            3133528351,
+            3142304631,
+            3134143746
+          ],
+          "samples_ts": [
+            40.8485,
+            40.7344,
+            40.8405
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 368
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:03:21.348175+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:02:41Z\",\n    \"avg_ns\": 408415165,\n    \"stddev_ns\": 76220,\n    \"avg_ts\": 313.406586,\n    \"stddev_ts\": 0.056399,\n    \"samples_ns\": [ 408480271, 408429752, 408335473 ],\n    \"samples_ts\": [ 313.357, 313.395, 313.468 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:02:42Z\",\n    \"avg_ns\": 12787668091,\n    \"stddev_ns\": 11106882,\n    \"avg_ts\": 40.038595,\n    \"stddev_ts\": 0.034773,\n    \"samples_ns\": [ 12776475806, 12798685196, 12787843273 ],\n    \"samples_ts\": [ 40.0736, 40.0041, 40.038 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:02:41Z",
+          "avg_ns": 408415165,
+          "stddev_ns": 76220,
+          "avg_ts": 313.406586,
+          "stddev_ts": 0.056399,
+          "samples_ns": [
+            408480271,
+            408429752,
+            408335473
+          ],
+          "samples_ts": [
+            313.357,
+            313.395,
+            313.468
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:02:42Z",
+          "avg_ns": 12787668091,
+          "stddev_ns": 11106882,
+          "avg_ts": 40.038595,
+          "stddev_ts": 0.034773,
+          "samples_ns": [
+            12776475806,
+            12798685196,
+            12787843273
+          ],
+          "samples_ts": [
+            40.0736,
+            40.0041,
+            40.038
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 369
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:03:39.218792+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:03:22Z\",\n    \"avg_ns\": 1876157217,\n    \"stddev_ns\": 431244,\n    \"avg_ts\": 272.898248,\n    \"stddev_ts\": 0.062095,\n    \"samples_ns\": [ 1876538222, 1875695891, 1876237540 ],\n    \"samples_ts\": [ 272.843, 272.965, 272.887 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:03:29Z\",\n    \"avg_ns\": 3156865922,\n    \"stddev_ns\": 996438,\n    \"avg_ts\": 40.546546,\n    \"stddev_ts\": 0.012755,\n    \"samples_ns\": [ 3156254544, 3156331232, 3158011992 ],\n    \"samples_ts\": [ 40.5544, 40.5534, 40.5318 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:03:22Z",
+          "avg_ns": 1876157217,
+          "stddev_ns": 431244,
+          "avg_ts": 272.898248,
+          "stddev_ts": 0.062095,
+          "samples_ns": [
+            1876538222,
+            1875695891,
+            1876237540
+          ],
+          "samples_ts": [
+            272.843,
+            272.965,
+            272.887
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:03:29Z",
+          "avg_ns": 3156865922,
+          "stddev_ns": 996438,
+          "avg_ts": 40.546546,
+          "stddev_ts": 0.012755,
+          "samples_ns": [
+            3156254544,
+            3156331232,
+            3158011992
+          ],
+          "samples_ts": [
+            40.5544,
+            40.5534,
+            40.5318
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 370
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:04:26.280465+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:03:39Z\",\n    \"avg_ns\": 1881251407,\n    \"stddev_ns\": 1220823,\n    \"avg_ts\": 272.159341,\n    \"stddev_ts\": 0.176328,\n    \"samples_ns\": [ 1880644080, 1882655077, 1880455066 ],\n    \"samples_ts\": [ 272.247, 271.956, 272.275 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:03:47Z\",\n    \"avg_ns\": 12888183751,\n    \"stddev_ns\": 1138983,\n    \"avg_ts\": 39.726312,\n    \"stddev_ts\": 0.003511,\n    \"samples_ns\": [ 12887814928, 12889461442, 12887274883 ],\n    \"samples_ts\": [ 39.7274, 39.7224, 39.7291 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:03:39Z",
+          "avg_ns": 1881251407,
+          "stddev_ns": 1220823,
+          "avg_ts": 272.159341,
+          "stddev_ts": 0.176328,
+          "samples_ns": [
+            1880644080,
+            1882655077,
+            1880455066
+          ],
+          "samples_ts": [
+            272.247,
+            271.956,
+            272.275
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:03:47Z",
+          "avg_ns": 12888183751,
+          "stddev_ns": 1138983,
+          "avg_ts": 39.726312,
+          "stddev_ts": 0.003511,
+          "samples_ns": [
+            12887814928,
+            12889461442,
+            12887274883
+          ],
+          "samples_ts": [
+            39.7274,
+            39.7224,
+            39.7291
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 371
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:04:38.310903+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:04:27Z\",\n    \"avg_ns\": 407179240,\n    \"stddev_ns\": 261019,\n    \"avg_ts\": 314.357960,\n    \"stddev_ts\": 0.201451,\n    \"samples_ns\": [ 406988387, 407072646, 407476687 ],\n    \"samples_ts\": [ 314.505, 314.44, 314.128 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:04:28Z\",\n    \"avg_ns\": 3163493624,\n    \"stddev_ns\": 1567511,\n    \"avg_ts\": 40.461602,\n    \"stddev_ts\": 0.020028,\n    \"samples_ns\": [ 3164209407, 3161698198, 3164573269 ],\n    \"samples_ts\": [ 40.4524, 40.4846, 40.4478 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:04:27Z",
+          "avg_ns": 407179240,
+          "stddev_ns": 261019,
+          "avg_ts": 314.35796,
+          "stddev_ts": 0.201451,
+          "samples_ns": [
+            406988387,
+            407072646,
+            407476687
+          ],
+          "samples_ts": [
+            314.505,
+            314.44,
+            314.128
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:04:28Z",
+          "avg_ns": 3163493624,
+          "stddev_ns": 1567511,
+          "avg_ts": 40.461602,
+          "stddev_ts": 0.020028,
+          "samples_ns": [
+            3164209407,
+            3161698198,
+            3164573269
+          ],
+          "samples_ts": [
+            40.4524,
+            40.4846,
+            40.4478
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 372
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:05:18.917380+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:04:39Z\",\n    \"avg_ns\": 402848669,\n    \"stddev_ns\": 274344,\n    \"avg_ts\": 317.737277,\n    \"stddev_ts\": 0.215868,\n    \"samples_ns\": [ 402541222, 403065394, 402939392 ],\n    \"samples_ts\": [ 317.98, 317.566, 317.666 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:04:40Z\",\n    \"avg_ns\": 12710945433,\n    \"stddev_ns\": 8411317,\n    \"avg_ts\": 40.280257,\n    \"stddev_ts\": 0.026650,\n    \"samples_ns\": [ 12710902792, 12702557029, 12719376480 ],\n    \"samples_ts\": [ 40.2804, 40.3068, 40.2535 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:04:39Z",
+          "avg_ns": 402848669,
+          "stddev_ns": 274344,
+          "avg_ts": 317.737277,
+          "stddev_ts": 0.215868,
+          "samples_ns": [
+            402541222,
+            403065394,
+            402939392
+          ],
+          "samples_ts": [
+            317.98,
+            317.566,
+            317.666
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:04:40Z",
+          "avg_ns": 12710945433,
+          "stddev_ns": 8411317,
+          "avg_ts": 40.280257,
+          "stddev_ts": 0.02665,
+          "samples_ns": [
+            12710902792,
+            12702557029,
+            12719376480
+          ],
+          "samples_ts": [
+            40.2804,
+            40.3068,
+            40.2535
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 373
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:05:36.090357+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:05:19Z\",\n    \"avg_ns\": 1689626188,\n    \"stddev_ns\": 551486,\n    \"avg_ts\": 303.025628,\n    \"stddev_ts\": 0.098643,\n    \"samples_ns\": [ 1690088092, 1689017836, 1689772637 ],\n    \"samples_ts\": [ 302.943, 303.135, 302.999 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:05:26Z\",\n    \"avg_ns\": 3171969573,\n    \"stddev_ns\": 1013621,\n    \"avg_ts\": 40.353479,\n    \"stddev_ts\": 0.012856,\n    \"samples_ns\": [ 3172893220, 3172125199, 3170890302 ],\n    \"samples_ts\": [ 40.3417, 40.3515, 40.3672 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:05:19Z",
+          "avg_ns": 1689626188,
+          "stddev_ns": 551486,
+          "avg_ts": 303.025628,
+          "stddev_ts": 0.098643,
+          "samples_ns": [
+            1690088092,
+            1689017836,
+            1689772637
+          ],
+          "samples_ts": [
+            302.943,
+            303.135,
+            302.999
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:05:26Z",
+          "avg_ns": 3171969573,
+          "stddev_ns": 1013621,
+          "avg_ts": 40.353479,
+          "stddev_ts": 0.012856,
+          "samples_ns": [
+            3172893220,
+            3172125199,
+            3170890302
+          ],
+          "samples_ts": [
+            40.3417,
+            40.3515,
+            40.3672
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 374
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:06:21.640266+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:05:36Z\",\n    \"avg_ns\": 1661510713,\n    \"stddev_ns\": 722856,\n    \"avg_ts\": 308.153333,\n    \"stddev_ts\": 0.133832,\n    \"samples_ns\": [ 1661333456, 1660894151, 1662304533 ],\n    \"samples_ts\": [ 308.186, 308.268, 308.006 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:05:43Z\",\n    \"avg_ns\": 12678265277,\n    \"stddev_ns\": 2200020,\n    \"avg_ts\": 40.384075,\n    \"stddev_ts\": 0.007007,\n    \"samples_ns\": [ 12677051516, 12676939496, 12680804819 ],\n    \"samples_ts\": [ 40.3879, 40.3883, 40.376 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:05:36Z",
+          "avg_ns": 1661510713,
+          "stddev_ns": 722856,
+          "avg_ts": 308.153333,
+          "stddev_ts": 0.133832,
+          "samples_ns": [
+            1661333456,
+            1660894151,
+            1662304533
+          ],
+          "samples_ts": [
+            308.186,
+            308.268,
+            308.006
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:05:43Z",
+          "avg_ns": 12678265277,
+          "stddev_ns": 2200020,
+          "avg_ts": 40.384075,
+          "stddev_ts": 0.007007,
+          "samples_ns": [
+            12677051516,
+            12676939496,
+            12680804819
+          ],
+          "samples_ts": [
+            40.3879,
+            40.3883,
+            40.376
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 375
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:06:33.542328+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:06:22Z\",\n    \"avg_ns\": 406090110,\n    \"stddev_ns\": 404060,\n    \"avg_ts\": 315.201186,\n    \"stddev_ts\": 0.313627,\n    \"samples_ns\": [ 406493893, 405685772, 406090665 ],\n    \"samples_ts\": [ 314.888, 315.515, 315.201 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:06:24Z\",\n    \"avg_ns\": 3127886995,\n    \"stddev_ns\": 679169,\n    \"avg_ts\": 40.922196,\n    \"stddev_ts\": 0.008825,\n    \"samples_ns\": [ 3127243550, 3128588849, 3127828588 ],\n    \"samples_ts\": [ 40.9306, 40.913, 40.923 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:06:22Z",
+          "avg_ns": 406090110,
+          "stddev_ns": 404060,
+          "avg_ts": 315.201186,
+          "stddev_ts": 0.313627,
+          "samples_ns": [
+            406493893,
+            405685772,
+            406090665
+          ],
+          "samples_ts": [
+            314.888,
+            315.515,
+            315.201
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:06:24Z",
+          "avg_ns": 3127886995,
+          "stddev_ns": 679169,
+          "avg_ts": 40.922196,
+          "stddev_ts": 0.008825,
+          "samples_ns": [
+            3127243550,
+            3128588849,
+            3127828588
+          ],
+          "samples_ts": [
+            40.9306,
+            40.913,
+            40.923
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 376
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:07:14.297163+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:06:34Z\",\n    \"avg_ns\": 406249643,\n    \"stddev_ns\": 183033,\n    \"avg_ts\": 315.077242,\n    \"stddev_ts\": 0.141083,\n    \"samples_ns\": [ 406440349, 406078012, 406230569 ],\n    \"samples_ts\": [ 314.929, 315.21, 315.092 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:06:35Z\",\n    \"avg_ns\": 12753596509,\n    \"stddev_ns\": 3284529,\n    \"avg_ts\": 40.145540,\n    \"stddev_ts\": 0.010328,\n    \"samples_ns\": [ 12750025965, 12756477699, 12754285865 ],\n    \"samples_ts\": [ 40.1568, 40.1365, 40.1434 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:06:34Z",
+          "avg_ns": 406249643,
+          "stddev_ns": 183033,
+          "avg_ts": 315.077242,
+          "stddev_ts": 0.141083,
+          "samples_ns": [
+            406440349,
+            406078012,
+            406230569
+          ],
+          "samples_ts": [
+            314.929,
+            315.21,
+            315.092
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:06:35Z",
+          "avg_ns": 12753596509,
+          "stddev_ns": 3284529,
+          "avg_ts": 40.14554,
+          "stddev_ts": 0.010328,
+          "samples_ns": [
+            12750025965,
+            12756477699,
+            12754285865
+          ],
+          "samples_ts": [
+            40.1568,
+            40.1365,
+            40.1434
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 377
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:07:31.463378+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:07:15Z\",\n    \"avg_ns\": 1696558698,\n    \"stddev_ns\": 548287,\n    \"avg_ts\": 301.787398,\n    \"stddev_ts\": 0.097546,\n    \"samples_ns\": [ 1696768888, 1695936414, 1696970792 ],\n    \"samples_ts\": [ 301.75, 301.898, 301.714 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:07:21Z\",\n    \"avg_ns\": 3161699723,\n    \"stddev_ns\": 1217876,\n    \"avg_ts\": 40.484557,\n    \"stddev_ts\": 0.015591,\n    \"samples_ns\": [ 3161094215, 3160903276, 3163101678 ],\n    \"samples_ts\": [ 40.4923, 40.4948, 40.4666 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:07:15Z",
+          "avg_ns": 1696558698,
+          "stddev_ns": 548287,
+          "avg_ts": 301.787398,
+          "stddev_ts": 0.097546,
+          "samples_ns": [
+            1696768888,
+            1695936414,
+            1696970792
+          ],
+          "samples_ts": [
+            301.75,
+            301.898,
+            301.714
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:07:21Z",
+          "avg_ns": 3161699723,
+          "stddev_ns": 1217876,
+          "avg_ts": 40.484557,
+          "stddev_ts": 0.015591,
+          "samples_ns": [
+            3161094215,
+            3160903276,
+            3163101678
+          ],
+          "samples_ts": [
+            40.4923,
+            40.4948,
+            40.4666
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 378
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:08:17.491213+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:07:32Z\",\n    \"avg_ns\": 1696019914,\n    \"stddev_ns\": 740207,\n    \"avg_ts\": 301.883286,\n    \"stddev_ts\": 0.131530,\n    \"samples_ns\": [ 1695387324, 1695840141, 1696832278 ],\n    \"samples_ts\": [ 301.996, 301.915, 301.739 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:07:39Z\",\n    \"avg_ns\": 12790883250,\n    \"stddev_ns\": 9623884,\n    \"avg_ts\": 40.028525,\n    \"stddev_ts\": 0.030130,\n    \"samples_ns\": [ 12795833860, 12797024078, 12779791812 ],\n    \"samples_ts\": [ 40.013, 40.0093, 40.0633 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:07:32Z",
+          "avg_ns": 1696019914,
+          "stddev_ns": 740207,
+          "avg_ts": 301.883286,
+          "stddev_ts": 0.13153,
+          "samples_ns": [
+            1695387324,
+            1695840141,
+            1696832278
+          ],
+          "samples_ts": [
+            301.996,
+            301.915,
+            301.739
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:07:39Z",
+          "avg_ns": 12790883250,
+          "stddev_ns": 9623884,
+          "avg_ts": 40.028525,
+          "stddev_ts": 0.03013,
+          "samples_ns": [
+            12795833860,
+            12797024078,
+            12779791812
+          ],
+          "samples_ts": [
+            40.013,
+            40.0093,
+            40.0633
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 379
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:08:29.495896+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:08:18Z\",\n    \"avg_ns\": 406744079,\n    \"stddev_ns\": 69763,\n    \"avg_ts\": 314.694200,\n    \"stddev_ts\": 0.053972,\n    \"samples_ns\": [ 406819938, 406682677, 406729622 ],\n    \"samples_ts\": [ 314.636, 314.742, 314.705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:08:19Z\",\n    \"avg_ns\": 3160749220,\n    \"stddev_ns\": 1118173,\n    \"avg_ts\": 40.496731,\n    \"stddev_ts\": 0.014311,\n    \"samples_ns\": [ 3161154105, 3161607062, 3159486494 ],\n    \"samples_ts\": [ 40.4915, 40.4857, 40.5129 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:08:18Z",
+          "avg_ns": 406744079,
+          "stddev_ns": 69763,
+          "avg_ts": 314.6942,
+          "stddev_ts": 0.053972,
+          "samples_ns": [
+            406819938,
+            406682677,
+            406729622
+          ],
+          "samples_ts": [
+            314.636,
+            314.742,
+            314.705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:08:19Z",
+          "avg_ns": 3160749220,
+          "stddev_ns": 1118173,
+          "avg_ts": 40.496731,
+          "stddev_ts": 0.014311,
+          "samples_ns": [
+            3161154105,
+            3161607062,
+            3159486494
+          ],
+          "samples_ts": [
+            40.4915,
+            40.4857,
+            40.5129
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 380
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:09:10.429220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:08:30Z\",\n    \"avg_ns\": 408026618,\n    \"stddev_ns\": 469621,\n    \"avg_ts\": 313.705300,\n    \"stddev_ts\": 0.360492,\n    \"samples_ns\": [ 408567359, 407785165, 407727331 ],\n    \"samples_ts\": [ 313.29, 313.891, 313.935 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:08:31Z\",\n    \"avg_ns\": 12809842705,\n    \"stddev_ns\": 13047020,\n    \"avg_ts\": 39.969293,\n    \"stddev_ts\": 0.040717,\n    \"samples_ns\": [ 12795904667, 12811860941, 12821762508 ],\n    \"samples_ts\": [ 40.0128, 39.963, 39.9321 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:08:30Z",
+          "avg_ns": 408026618,
+          "stddev_ns": 469621,
+          "avg_ts": 313.7053,
+          "stddev_ts": 0.360492,
+          "samples_ns": [
+            408567359,
+            407785165,
+            407727331
+          ],
+          "samples_ts": [
+            313.29,
+            313.891,
+            313.935
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:08:31Z",
+          "avg_ns": 12809842705,
+          "stddev_ns": 13047020,
+          "avg_ts": 39.969293,
+          "stddev_ts": 0.040717,
+          "samples_ns": [
+            12795904667,
+            12811860941,
+            12821762508
+          ],
+          "samples_ts": [
+            40.0128,
+            39.963,
+            39.9321
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 381
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:09:28.336429+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:09:11Z\",\n    \"avg_ns\": 1880303819,\n    \"stddev_ns\": 2156657,\n    \"avg_ts\": 272.296659,\n    \"stddev_ts\": 0.312331,\n    \"samples_ns\": [ 1880916348, 1877908054, 1882087057 ],\n    \"samples_ts\": [ 272.208, 272.644, 272.038 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:09:18Z\",\n    \"avg_ns\": 3155975042,\n    \"stddev_ns\": 1579620,\n    \"avg_ts\": 40.557995,\n    \"stddev_ts\": 0.020272,\n    \"samples_ns\": [ 3155771308, 3157644634, 3154509186 ],\n    \"samples_ts\": [ 40.5606, 40.5365, 40.5768 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:09:11Z",
+          "avg_ns": 1880303819,
+          "stddev_ns": 2156657,
+          "avg_ts": 272.296659,
+          "stddev_ts": 0.312331,
+          "samples_ns": [
+            1880916348,
+            1877908054,
+            1882087057
+          ],
+          "samples_ts": [
+            272.208,
+            272.644,
+            272.038
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:09:18Z",
+          "avg_ns": 3155975042,
+          "stddev_ns": 1579620,
+          "avg_ts": 40.557995,
+          "stddev_ts": 0.020272,
+          "samples_ns": [
+            3155771308,
+            3157644634,
+            3154509186
+          ],
+          "samples_ts": [
+            40.5606,
+            40.5365,
+            40.5768
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 382
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:10:15.372087+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:09:29Z\",\n    \"avg_ns\": 1881396919,\n    \"stddev_ns\": 1188577,\n    \"avg_ts\": 272.138288,\n    \"stddev_ts\": 0.171752,\n    \"samples_ns\": [ 1880870466, 1882756946, 1880563346 ],\n    \"samples_ts\": [ 272.214, 271.942, 272.259 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:09:36Z\",\n    \"avg_ns\": 12880979133,\n    \"stddev_ns\": 7043717,\n    \"avg_ts\": 39.748539,\n    \"stddev_ts\": 0.021729,\n    \"samples_ns\": [ 12874985704, 12879215577, 12888736119 ],\n    \"samples_ts\": [ 39.767, 39.754, 39.7246 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:09:29Z",
+          "avg_ns": 1881396919,
+          "stddev_ns": 1188577,
+          "avg_ts": 272.138288,
+          "stddev_ts": 0.171752,
+          "samples_ns": [
+            1880870466,
+            1882756946,
+            1880563346
+          ],
+          "samples_ts": [
+            272.214,
+            271.942,
+            272.259
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:09:36Z",
+          "avg_ns": 12880979133,
+          "stddev_ns": 7043717,
+          "avg_ts": 39.748539,
+          "stddev_ts": 0.021729,
+          "samples_ns": [
+            12874985704,
+            12879215577,
+            12888736119
+          ],
+          "samples_ts": [
+            39.767,
+            39.754,
+            39.7246
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 383
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:10:27.269276+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:10:16Z\",\n    \"avg_ns\": 406382910,\n    \"stddev_ns\": 536572,\n    \"avg_ts\": 314.974240,\n    \"stddev_ts\": 0.414996,\n    \"samples_ns\": [ 406012962, 406997362, 406138408 ],\n    \"samples_ts\": [ 315.261, 314.498, 315.163 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:10:17Z\",\n    \"avg_ns\": 3125886796,\n    \"stddev_ns\": 502172,\n    \"avg_ts\": 40.948381,\n    \"stddev_ts\": 0.006496,\n    \"samples_ns\": [ 3126458545, 3125573472, 3125628373 ],\n    \"samples_ts\": [ 40.9409, 40.9525, 40.9518 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:10:16Z",
+          "avg_ns": 406382910,
+          "stddev_ns": 536572,
+          "avg_ts": 314.97424,
+          "stddev_ts": 0.414996,
+          "samples_ns": [
+            406012962,
+            406997362,
+            406138408
+          ],
+          "samples_ts": [
+            315.261,
+            314.498,
+            315.163
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:10:17Z",
+          "avg_ns": 3125886796,
+          "stddev_ns": 502172,
+          "avg_ts": 40.948381,
+          "stddev_ts": 0.006496,
+          "samples_ns": [
+            3126458545,
+            3125573472,
+            3125628373
+          ],
+          "samples_ts": [
+            40.9409,
+            40.9525,
+            40.9518
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 384
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:11:08.486218+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:10:28Z\",\n    \"avg_ns\": 406650866,\n    \"stddev_ns\": 490526,\n    \"avg_ts\": 314.766633,\n    \"stddev_ts\": 0.379253,\n    \"samples_ns\": [ 406564668, 407178359, 406209572 ],\n    \"samples_ts\": [ 314.833, 314.359, 315.108 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:10:29Z\",\n    \"avg_ns\": 12899122155,\n    \"stddev_ns\": 3508375,\n    \"avg_ts\": 39.692626,\n    \"stddev_ts\": 0.010796,\n    \"samples_ns\": [ 12895631702, 12899086580, 12902648183 ],\n    \"samples_ts\": [ 39.7034, 39.6927, 39.6818 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:10:28Z",
+          "avg_ns": 406650866,
+          "stddev_ns": 490526,
+          "avg_ts": 314.766633,
+          "stddev_ts": 0.379253,
+          "samples_ns": [
+            406564668,
+            407178359,
+            406209572
+          ],
+          "samples_ts": [
+            314.833,
+            314.359,
+            315.108
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:10:29Z",
+          "avg_ns": 12899122155,
+          "stddev_ns": 3508375,
+          "avg_ts": 39.692626,
+          "stddev_ts": 0.010796,
+          "samples_ns": [
+            12895631702,
+            12899086580,
+            12902648183
+          ],
+          "samples_ts": [
+            39.7034,
+            39.6927,
+            39.6818
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 385
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:11:25.634331+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:11:09Z\",\n    \"avg_ns\": 1689908195,\n    \"stddev_ns\": 350456,\n    \"avg_ts\": 302.975047,\n    \"stddev_ts\": 0.062837,\n    \"samples_ns\": [ 1690020476, 1689515358, 1690188751 ],\n    \"samples_ts\": [ 302.955, 303.045, 302.925 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:11:16Z\",\n    \"avg_ns\": 3163760953,\n    \"stddev_ns\": 1998077,\n    \"avg_ts\": 40.458188,\n    \"stddev_ts\": 0.025544,\n    \"samples_ns\": [ 3163989555, 3165634102, 3161659203 ],\n    \"samples_ts\": [ 40.4553, 40.4342, 40.4851 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:11:09Z",
+          "avg_ns": 1689908195,
+          "stddev_ns": 350456,
+          "avg_ts": 302.975047,
+          "stddev_ts": 0.062837,
+          "samples_ns": [
+            1690020476,
+            1689515358,
+            1690188751
+          ],
+          "samples_ts": [
+            302.955,
+            303.045,
+            302.925
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:11:16Z",
+          "avg_ns": 3163760953,
+          "stddev_ns": 1998077,
+          "avg_ts": 40.458188,
+          "stddev_ts": 0.025544,
+          "samples_ns": [
+            3163989555,
+            3165634102,
+            3161659203
+          ],
+          "samples_ts": [
+            40.4553,
+            40.4342,
+            40.4851
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 386
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:12:12.025222+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:11:26Z\",\n    \"avg_ns\": 1691047656,\n    \"stddev_ns\": 323954,\n    \"avg_ts\": 302.770895,\n    \"stddev_ts\": 0.057537,\n    \"samples_ns\": [ 1690686963, 1691152633, 1691303373 ],\n    \"samples_ts\": [ 302.835, 302.752, 302.725 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:11:33Z\",\n    \"avg_ns\": 12907942385,\n    \"stddev_ns\": 8617794,\n    \"avg_ts\": 39.665513,\n    \"stddev_ts\": 0.026474,\n    \"samples_ns\": [ 12917079837, 12906782589, 12899964731 ],\n    \"samples_ts\": [ 39.6374, 39.6691, 39.69 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:11:26Z",
+          "avg_ns": 1691047656,
+          "stddev_ns": 323954,
+          "avg_ts": 302.770895,
+          "stddev_ts": 0.057537,
+          "samples_ns": [
+            1690686963,
+            1691152633,
+            1691303373
+          ],
+          "samples_ts": [
+            302.835,
+            302.752,
+            302.725
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:11:33Z",
+          "avg_ns": 12907942385,
+          "stddev_ns": 8617794,
+          "avg_ts": 39.665513,
+          "stddev_ts": 0.026474,
+          "samples_ns": [
+            12917079837,
+            12906782589,
+            12899964731
+          ],
+          "samples_ts": [
+            39.6374,
+            39.6691,
+            39.69
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 387
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:12:24.071817+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:12:12Z\",\n    \"avg_ns\": 406736616,\n    \"stddev_ns\": 161135,\n    \"avg_ts\": 314.700000,\n    \"stddev_ts\": 0.123666,\n    \"samples_ns\": [ 406920143, 406627640, 406662066 ],\n    \"samples_ts\": [ 314.558, 314.784, 314.758 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:12:14Z\",\n    \"avg_ns\": 3174619409,\n    \"stddev_ns\": 46593,\n    \"avg_ts\": 40.319794,\n    \"stddev_ts\": 0.000592,\n    \"samples_ns\": [ 3174610193, 3174578112, 3174669922 ],\n    \"samples_ts\": [ 40.3199, 40.3203, 40.3192 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:12:12Z",
+          "avg_ns": 406736616,
+          "stddev_ns": 161135,
+          "avg_ts": 314.7,
+          "stddev_ts": 0.123666,
+          "samples_ns": [
+            406920143,
+            406627640,
+            406662066
+          ],
+          "samples_ts": [
+            314.558,
+            314.784,
+            314.758
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:12:14Z",
+          "avg_ns": 3174619409,
+          "stddev_ns": 46593,
+          "avg_ts": 40.319794,
+          "stddev_ts": 0.000592,
+          "samples_ns": [
+            3174610193,
+            3174578112,
+            3174669922
+          ],
+          "samples_ts": [
+            40.3199,
+            40.3203,
+            40.3192
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 388
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:13:05.199097+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:12:24Z\",\n    \"avg_ns\": 408950185,\n    \"stddev_ns\": 433017,\n    \"avg_ts\": 312.996790,\n    \"stddev_ts\": 0.330905,\n    \"samples_ns\": [ 408603027, 408812789, 409434740 ],\n    \"samples_ts\": [ 313.262, 313.102, 312.626 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:12:26Z\",\n    \"avg_ns\": 12851774343,\n    \"stddev_ns\": 3039679,\n    \"avg_ts\": 39.838858,\n    \"stddev_ts\": 0.009409,\n    \"samples_ns\": [ 12849045410, 12851233912, 12855043709 ],\n    \"samples_ts\": [ 39.8473, 39.8405, 39.8287 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:12:24Z",
+          "avg_ns": 408950185,
+          "stddev_ns": 433017,
+          "avg_ts": 312.99679,
+          "stddev_ts": 0.330905,
+          "samples_ns": [
+            408603027,
+            408812789,
+            409434740
+          ],
+          "samples_ts": [
+            313.262,
+            313.102,
+            312.626
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:12:26Z",
+          "avg_ns": 12851774343,
+          "stddev_ns": 3039679,
+          "avg_ts": 39.838858,
+          "stddev_ts": 0.009409,
+          "samples_ns": [
+            12849045410,
+            12851233912,
+            12855043709
+          ],
+          "samples_ts": [
+            39.8473,
+            39.8405,
+            39.8287
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 389
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:13:22.457963+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:13:06Z\",\n    \"avg_ns\": 1731407166,\n    \"stddev_ns\": 643921,\n    \"avg_ts\": 295.713254,\n    \"stddev_ts\": 0.109525,\n    \"samples_ns\": [ 1732001168, 1731492976, 1730727356 ],\n    \"samples_ts\": [ 295.612, 295.699, 295.829 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:13:12Z\",\n    \"avg_ns\": 3138398191,\n    \"stddev_ns\": 582251,\n    \"avg_ts\": 40.785138,\n    \"stddev_ts\": 0.007496,\n    \"samples_ns\": [ 3138395870, 3138976185, 3137822520 ],\n    \"samples_ts\": [ 40.7852, 40.7776, 40.7926 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:13:06Z",
+          "avg_ns": 1731407166,
+          "stddev_ns": 643921,
+          "avg_ts": 295.713254,
+          "stddev_ts": 0.109525,
+          "samples_ns": [
+            1732001168,
+            1731492976,
+            1730727356
+          ],
+          "samples_ts": [
+            295.612,
+            295.699,
+            295.829
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:13:12Z",
+          "avg_ns": 3138398191,
+          "stddev_ns": 582251,
+          "avg_ts": 40.785138,
+          "stddev_ts": 0.007496,
+          "samples_ns": [
+            3138395870,
+            3138976185,
+            3137822520
+          ],
+          "samples_ts": [
+            40.7852,
+            40.7776,
+            40.7926
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 390
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:14:09.072489+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:13:23Z\",\n    \"avg_ns\": 1731476305,\n    \"stddev_ns\": 187874,\n    \"avg_ts\": 295.701421,\n    \"stddev_ts\": 0.030472,\n    \"samples_ns\": [ 1731587296, 1731270495, 1731571126 ],\n    \"samples_ts\": [ 295.682, 295.737, 295.685 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:13:30Z\",\n    \"avg_ns\": 12929457442,\n    \"stddev_ns\": 7160967,\n    \"avg_ts\": 39.599504,\n    \"stddev_ts\": 0.021922,\n    \"samples_ns\": [ 12925249398, 12925398203, 12937724726 ],\n    \"samples_ts\": [ 39.6124, 39.6119, 39.5742 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:13:23Z",
+          "avg_ns": 1731476305,
+          "stddev_ns": 187874,
+          "avg_ts": 295.701421,
+          "stddev_ts": 0.030472,
+          "samples_ns": [
+            1731587296,
+            1731270495,
+            1731571126
+          ],
+          "samples_ts": [
+            295.682,
+            295.737,
+            295.685
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:13:30Z",
+          "avg_ns": 12929457442,
+          "stddev_ns": 7160967,
+          "avg_ts": 39.599504,
+          "stddev_ts": 0.021922,
+          "samples_ns": [
+            12925249398,
+            12925398203,
+            12937724726
+          ],
+          "samples_ts": [
+            39.6124,
+            39.6119,
+            39.5742
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 391
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:14:20.955449+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:14:09Z\",\n    \"avg_ns\": 406371025,\n    \"stddev_ns\": 538036,\n    \"avg_ts\": 314.983455,\n    \"stddev_ts\": 0.416753,\n    \"samples_ns\": [ 405831162, 406906451, 406375463 ],\n    \"samples_ts\": [ 315.402, 314.569, 314.98 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:14:11Z\",\n    \"avg_ns\": 3122084052,\n    \"stddev_ns\": 1274640,\n    \"avg_ts\": 40.998260,\n    \"stddev_ts\": 0.016737,\n    \"samples_ns\": [ 3123447089, 3121883476, 3120921591 ],\n    \"samples_ts\": [ 40.9804, 41.0009, 41.0135 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:14:09Z",
+          "avg_ns": 406371025,
+          "stddev_ns": 538036,
+          "avg_ts": 314.983455,
+          "stddev_ts": 0.416753,
+          "samples_ns": [
+            405831162,
+            406906451,
+            406375463
+          ],
+          "samples_ts": [
+            315.402,
+            314.569,
+            314.98
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:14:11Z",
+          "avg_ns": 3122084052,
+          "stddev_ns": 1274640,
+          "avg_ts": 40.99826,
+          "stddev_ts": 0.016737,
+          "samples_ns": [
+            3123447089,
+            3121883476,
+            3120921591
+          ],
+          "samples_ts": [
+            40.9804,
+            41.0009,
+            41.0135
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 392
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:15:01.751143+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:14:21Z\",\n    \"avg_ns\": 406178015,\n    \"stddev_ns\": 257908,\n    \"avg_ts\": 315.132847,\n    \"stddev_ts\": 0.200025,\n    \"samples_ns\": [ 406035847, 406022476, 406475722 ],\n    \"samples_ts\": [ 315.243, 315.253, 314.902 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:14:23Z\",\n    \"avg_ns\": 12757378521,\n    \"stddev_ns\": 8600849,\n    \"avg_ts\": 40.133649,\n    \"stddev_ts\": 0.027062,\n    \"samples_ns\": [ 12748094280, 12758966666, 12765074617 ],\n    \"samples_ts\": [ 40.1629, 40.1286, 40.1094 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:14:21Z",
+          "avg_ns": 406178015,
+          "stddev_ns": 257908,
+          "avg_ts": 315.132847,
+          "stddev_ts": 0.200025,
+          "samples_ns": [
+            406035847,
+            406022476,
+            406475722
+          ],
+          "samples_ts": [
+            315.243,
+            315.253,
+            314.902
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:14:23Z",
+          "avg_ns": 12757378521,
+          "stddev_ns": 8600849,
+          "avg_ts": 40.133649,
+          "stddev_ts": 0.027062,
+          "samples_ns": [
+            12748094280,
+            12758966666,
+            12765074617
+          ],
+          "samples_ts": [
+            40.1629,
+            40.1286,
+            40.1094
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 393
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:15:19.445237+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:15:02Z\",\n    \"avg_ns\": 1831907473,\n    \"stddev_ns\": 1969321,\n    \"avg_ts\": 279.490313,\n    \"stddev_ts\": 0.300201,\n    \"samples_ns\": [ 1834073225, 1831421845, 1830227351 ],\n    \"samples_ts\": [ 279.16, 279.564, 279.747 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:15:09Z\",\n    \"avg_ns\": 3156531698,\n    \"stddev_ns\": 2055951,\n    \"avg_ts\": 40.550848,\n    \"stddev_ts\": 0.026390,\n    \"samples_ns\": [ 3156336504, 3158676745, 3154581847 ],\n    \"samples_ts\": [ 40.5533, 40.5233, 40.5759 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:15:02Z",
+          "avg_ns": 1831907473,
+          "stddev_ns": 1969321,
+          "avg_ts": 279.490313,
+          "stddev_ts": 0.300201,
+          "samples_ns": [
+            1834073225,
+            1831421845,
+            1830227351
+          ],
+          "samples_ts": [
+            279.16,
+            279.564,
+            279.747
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:15:09Z",
+          "avg_ns": 3156531698,
+          "stddev_ns": 2055951,
+          "avg_ts": 40.550848,
+          "stddev_ts": 0.02639,
+          "samples_ns": [
+            3156336504,
+            3158676745,
+            3154581847
+          ],
+          "samples_ts": [
+            40.5533,
+            40.5233,
+            40.5759
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 394
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:16:05.936316+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:15:20Z\",\n    \"avg_ns\": 1826788205,\n    \"stddev_ns\": 1816849,\n    \"avg_ts\": 280.273507,\n    \"stddev_ts\": 0.278751,\n    \"samples_ns\": [ 1826797120, 1828600581, 1824966914 ],\n    \"samples_ts\": [ 280.272, 279.996, 280.553 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:15:27Z\",\n    \"avg_ns\": 12758557499,\n    \"stddev_ns\": 3612667,\n    \"avg_ts\": 40.129931,\n    \"stddev_ts\": 0.011356,\n    \"samples_ns\": [ 12762693143, 12756948964, 12756030391 ],\n    \"samples_ts\": [ 40.1169, 40.135, 40.1379 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:15:20Z",
+          "avg_ns": 1826788205,
+          "stddev_ns": 1816849,
+          "avg_ts": 280.273507,
+          "stddev_ts": 0.278751,
+          "samples_ns": [
+            1826797120,
+            1828600581,
+            1824966914
+          ],
+          "samples_ts": [
+            280.272,
+            279.996,
+            280.553
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:15:27Z",
+          "avg_ns": 12758557499,
+          "stddev_ns": 3612667,
+          "avg_ts": 40.129931,
+          "stddev_ts": 0.011356,
+          "samples_ns": [
+            12762693143,
+            12756948964,
+            12756030391
+          ],
+          "samples_ts": [
+            40.1169,
+            40.135,
+            40.1379
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 395
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:16:18.915335+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:16:06Z\",\n    \"avg_ns\": 325874926,\n    \"stddev_ns\": 456170,\n    \"avg_ts\": 392.789246,\n    \"stddev_ts\": 0.549064,\n    \"samples_ns\": [ 325909378, 325403224, 326312178 ],\n    \"samples_ts\": [ 392.747, 393.358, 392.262 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:16:08Z\",\n    \"avg_ns\": 3591041539,\n    \"stddev_ns\": 8076362,\n    \"avg_ts\": 35.644375,\n    \"stddev_ts\": 0.080095,\n    \"samples_ns\": [ 3584234958, 3599965092, 3588924569 ],\n    \"samples_ts\": [ 35.7119, 35.5559, 35.6653 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:16:06Z",
+          "avg_ns": 325874926,
+          "stddev_ns": 456170,
+          "avg_ts": 392.789246,
+          "stddev_ts": 0.549064,
+          "samples_ns": [
+            325909378,
+            325403224,
+            326312178
+          ],
+          "samples_ts": [
+            392.747,
+            393.358,
+            392.262
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:16:08Z",
+          "avg_ns": 3591041539,
+          "stddev_ns": 8076362,
+          "avg_ts": 35.644375,
+          "stddev_ts": 0.080095,
+          "samples_ns": [
+            3584234958,
+            3599965092,
+            3588924569
+          ],
+          "samples_ts": [
+            35.7119,
+            35.5559,
+            35.6653
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 396
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:17:05.333227+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:16:19Z\",\n    \"avg_ns\": 323689298,\n    \"stddev_ns\": 3090734,\n    \"avg_ts\": 395.464844,\n    \"stddev_ts\": 3.755370,\n    \"samples_ns\": [ 327257056, 321830299, 321980540 ],\n    \"samples_ts\": [ 391.13, 397.725, 397.54 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:16:20Z\",\n    \"avg_ns\": 14742751496,\n    \"stddev_ns\": 20101370,\n    \"avg_ts\": 34.728974,\n    \"stddev_ts\": 0.047359,\n    \"samples_ns\": [ 14744140644, 14721991584, 14762122260 ],\n    \"samples_ts\": [ 34.7257, 34.7779, 34.6834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:16:19Z",
+          "avg_ns": 323689298,
+          "stddev_ns": 3090734,
+          "avg_ts": 395.464844,
+          "stddev_ts": 3.75537,
+          "samples_ns": [
+            327257056,
+            321830299,
+            321980540
+          ],
+          "samples_ts": [
+            391.13,
+            397.725,
+            397.54
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:16:20Z",
+          "avg_ns": 14742751496,
+          "stddev_ns": 20101370,
+          "avg_ts": 34.728974,
+          "stddev_ts": 0.047359,
+          "samples_ns": [
+            14744140644,
+            14721991584,
+            14762122260
+          ],
+          "samples_ts": [
+            34.7257,
+            34.7779,
+            34.6834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 397
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:17:22.453083+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:17:06Z\",\n    \"avg_ns\": 1342820971,\n    \"stddev_ns\": 3502399,\n    \"avg_ts\": 381.288593,\n    \"stddev_ts\": 0.992978,\n    \"samples_ns\": [ 1340408664, 1346837765, 1341216486 ],\n    \"samples_ts\": [ 381.973, 380.15, 381.743 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:17:11Z\",\n    \"avg_ns\": 3608504910,\n    \"stddev_ns\": 7210367,\n    \"avg_ts\": 35.471849,\n    \"stddev_ts\": 0.070796,\n    \"samples_ns\": [ 3605110696, 3616785456, 3603618580 ],\n    \"samples_ts\": [ 35.5052, 35.3905, 35.5199 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:17:06Z",
+          "avg_ns": 1342820971,
+          "stddev_ns": 3502399,
+          "avg_ts": 381.288593,
+          "stddev_ts": 0.992978,
+          "samples_ns": [
+            1340408664,
+            1346837765,
+            1341216486
+          ],
+          "samples_ts": [
+            381.973,
+            380.15,
+            381.743
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:17:11Z",
+          "avg_ns": 3608504910,
+          "stddev_ns": 7210367,
+          "avg_ts": 35.471849,
+          "stddev_ts": 0.070796,
+          "samples_ns": [
+            3605110696,
+            3616785456,
+            3603618580
+          ],
+          "samples_ts": [
+            35.5052,
+            35.3905,
+            35.5199
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 398
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:18:13.473834+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:17:23Z\",\n    \"avg_ns\": 1346247325,\n    \"stddev_ns\": 3632242,\n    \"avg_ts\": 380.318291,\n    \"stddev_ts\": 1.025117,\n    \"samples_ns\": [ 1345417355, 1350222347, 1343102275 ],\n    \"samples_ts\": [ 380.551, 379.197, 381.207 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:17:28Z\",\n    \"avg_ns\": 14912003986,\n    \"stddev_ns\": 5321923,\n    \"avg_ts\": 34.334758,\n    \"stddev_ts\": 0.012245,\n    \"samples_ns\": [ 14909263171, 14918134541, 14908614248 ],\n    \"samples_ts\": [ 34.3411, 34.3206, 34.3426 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:17:23Z",
+          "avg_ns": 1346247325,
+          "stddev_ns": 3632242,
+          "avg_ts": 380.318291,
+          "stddev_ts": 1.025117,
+          "samples_ns": [
+            1345417355,
+            1350222347,
+            1343102275
+          ],
+          "samples_ts": [
+            380.551,
+            379.197,
+            381.207
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:17:28Z",
+          "avg_ns": 14912003986,
+          "stddev_ns": 5321923,
+          "avg_ts": 34.334758,
+          "stddev_ts": 0.012245,
+          "samples_ns": [
+            14909263171,
+            14918134541,
+            14908614248
+          ],
+          "samples_ts": [
+            34.3411,
+            34.3206,
+            34.3426
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 399
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:18:26.444678+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:18:14Z\",\n    \"avg_ns\": 323802311,\n    \"stddev_ns\": 314311,\n    \"avg_ts\": 395.303170,\n    \"stddev_ts\": 0.383113,\n    \"samples_ns\": [ 323816398, 324108827, 323481709 ],\n    \"samples_ts\": [ 395.286, 394.929, 395.695 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:18:15Z\",\n    \"avg_ns\": 3579111755,\n    \"stddev_ns\": 1760417,\n    \"avg_ts\": 35.763069,\n    \"stddev_ts\": 0.017591,\n    \"samples_ns\": [ 3579146864, 3577334046, 3580854355 ],\n    \"samples_ts\": [ 35.7627, 35.7808, 35.7457 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:18:14Z",
+          "avg_ns": 323802311,
+          "stddev_ns": 314311,
+          "avg_ts": 395.30317,
+          "stddev_ts": 0.383113,
+          "samples_ns": [
+            323816398,
+            324108827,
+            323481709
+          ],
+          "samples_ts": [
+            395.286,
+            394.929,
+            395.695
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:18:15Z",
+          "avg_ns": 3579111755,
+          "stddev_ns": 1760417,
+          "avg_ts": 35.763069,
+          "stddev_ts": 0.017591,
+          "samples_ns": [
+            3579146864,
+            3577334046,
+            3580854355
+          ],
+          "samples_ts": [
+            35.7627,
+            35.7808,
+            35.7457
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 400
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:19:13.134775+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:18:27Z\",\n    \"avg_ns\": 329588201,\n    \"stddev_ns\": 535306,\n    \"avg_ts\": 388.364098,\n    \"stddev_ts\": 0.630196,\n    \"samples_ns\": [ 329641678, 330094145, 329028782 ],\n    \"samples_ts\": [ 388.3, 387.768, 389.024 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:18:28Z\",\n    \"avg_ns\": 14822870674,\n    \"stddev_ns\": 15547813,\n    \"avg_ts\": 34.541243,\n    \"stddev_ts\": 0.036208,\n    \"samples_ns\": [ 14813862689, 14840823155, 14813926179 ],\n    \"samples_ts\": [ 34.5622, 34.4994, 34.5621 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:18:27Z",
+          "avg_ns": 329588201,
+          "stddev_ns": 535306,
+          "avg_ts": 388.364098,
+          "stddev_ts": 0.630196,
+          "samples_ns": [
+            329641678,
+            330094145,
+            329028782
+          ],
+          "samples_ts": [
+            388.3,
+            387.768,
+            389.024
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:18:28Z",
+          "avg_ns": 14822870674,
+          "stddev_ns": 15547813,
+          "avg_ts": 34.541243,
+          "stddev_ts": 0.036208,
+          "samples_ns": [
+            14813862689,
+            14840823155,
+            14813926179
+          ],
+          "samples_ts": [
+            34.5622,
+            34.4994,
+            34.5621
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 401
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:19:30.512451+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:19:13Z\",\n    \"avg_ns\": 1401171643,\n    \"stddev_ns\": 2109501,\n    \"avg_ts\": 365.409032,\n    \"stddev_ts\": 0.550427,\n    \"samples_ns\": [ 1398742805, 1402235228, 1402536898 ],\n    \"samples_ts\": [ 366.043, 365.131, 365.053 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:19:19Z\",\n    \"avg_ns\": 3618155475,\n    \"stddev_ns\": 8426346,\n    \"avg_ts\": 35.377270,\n    \"stddev_ts\": 0.082280,\n    \"samples_ns\": [ 3612708079, 3613897476, 3627860871 ],\n    \"samples_ts\": [ 35.4305, 35.4188, 35.2825 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:19:13Z",
+          "avg_ns": 1401171643,
+          "stddev_ns": 2109501,
+          "avg_ts": 365.409032,
+          "stddev_ts": 0.550427,
+          "samples_ns": [
+            1398742805,
+            1402235228,
+            1402536898
+          ],
+          "samples_ts": [
+            366.043,
+            365.131,
+            365.053
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:19:19Z",
+          "avg_ns": 3618155475,
+          "stddev_ns": 8426346,
+          "avg_ts": 35.37727,
+          "stddev_ts": 0.08228,
+          "samples_ns": [
+            3612708079,
+            3613897476,
+            3627860871
+          ],
+          "samples_ts": [
+            35.4305,
+            35.4188,
+            35.2825
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 402
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:20:20.560802+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:19:31Z\",\n    \"avg_ns\": 1362840533,\n    \"stddev_ns\": 1420602,\n    \"avg_ts\": 375.686192,\n    \"stddev_ts\": 0.391498,\n    \"samples_ns\": [ 1362888183, 1364236232, 1361397185 ],\n    \"samples_ts\": [ 375.673, 375.302, 376.084 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:19:36Z\",\n    \"avg_ns\": 14567335517,\n    \"stddev_ns\": 24594108,\n    \"avg_ts\": 35.147194,\n    \"stddev_ts\": 0.059393,\n    \"samples_ns\": [ 14539117721, 14578674469, 14584214362 ],\n    \"samples_ts\": [ 35.2153, 35.1198, 35.1065 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:19:31Z",
+          "avg_ns": 1362840533,
+          "stddev_ns": 1420602,
+          "avg_ts": 375.686192,
+          "stddev_ts": 0.391498,
+          "samples_ns": [
+            1362888183,
+            1364236232,
+            1361397185
+          ],
+          "samples_ts": [
+            375.673,
+            375.302,
+            376.084
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:19:36Z",
+          "avg_ns": 14567335517,
+          "stddev_ns": 24594108,
+          "avg_ts": 35.147194,
+          "stddev_ts": 0.059393,
+          "samples_ns": [
+            14539117721,
+            14578674469,
+            14584214362
+          ],
+          "samples_ts": [
+            35.2153,
+            35.1198,
+            35.1065
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 403
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:20:33.530965+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:20:21Z\",\n    \"avg_ns\": 325282297,\n    \"stddev_ns\": 259409,\n    \"avg_ts\": 393.504520,\n    \"stddev_ts\": 0.313777,\n    \"samples_ns\": [ 325254605, 325554442, 325037844 ],\n    \"samples_ts\": [ 393.538, 393.175, 393.8 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:20:22Z\",\n    \"avg_ns\": 3588297542,\n    \"stddev_ns\": 7956519,\n    \"avg_ts\": 35.671629,\n    \"stddev_ts\": 0.078996,\n    \"samples_ns\": [ 3583469565, 3597480886, 3583942175 ],\n    \"samples_ts\": [ 35.7196, 35.5805, 35.7149 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:20:21Z",
+          "avg_ns": 325282297,
+          "stddev_ns": 259409,
+          "avg_ts": 393.50452,
+          "stddev_ts": 0.313777,
+          "samples_ns": [
+            325254605,
+            325554442,
+            325037844
+          ],
+          "samples_ts": [
+            393.538,
+            393.175,
+            393.8
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:20:22Z",
+          "avg_ns": 3588297542,
+          "stddev_ns": 7956519,
+          "avg_ts": 35.671629,
+          "stddev_ts": 0.078996,
+          "samples_ns": [
+            3583469565,
+            3597480886,
+            3583942175
+          ],
+          "samples_ts": [
+            35.7196,
+            35.5805,
+            35.7149
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 404
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:21:19.362099+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:20:34Z\",\n    \"avg_ns\": 322742771,\n    \"stddev_ns\": 660690,\n    \"avg_ts\": 396.601781,\n    \"stddev_ts\": 0.812132,\n    \"samples_ns\": [ 322807692, 323368604, 322052017 ],\n    \"samples_ts\": [ 396.521, 395.833, 397.451 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:20:35Z\",\n    \"avg_ns\": 14548721320,\n    \"stddev_ns\": 8968225,\n    \"avg_ts\": 35.192105,\n    \"stddev_ts\": 0.021686,\n    \"samples_ns\": [ 14558710676, 14546087596, 14541365689 ],\n    \"samples_ts\": [ 35.1679, 35.1985, 35.2099 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:20:34Z",
+          "avg_ns": 322742771,
+          "stddev_ns": 660690,
+          "avg_ts": 396.601781,
+          "stddev_ts": 0.812132,
+          "samples_ns": [
+            322807692,
+            323368604,
+            322052017
+          ],
+          "samples_ts": [
+            396.521,
+            395.833,
+            397.451
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:20:35Z",
+          "avg_ns": 14548721320,
+          "stddev_ns": 8968225,
+          "avg_ts": 35.192105,
+          "stddev_ts": 0.021686,
+          "samples_ns": [
+            14558710676,
+            14546087596,
+            14541365689
+          ],
+          "samples_ts": [
+            35.1679,
+            35.1985,
+            35.2099
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 405
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:21:37.223410+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:21:20Z\",\n    \"avg_ns\": 1535351070,\n    \"stddev_ns\": 3929197,\n    \"avg_ts\": 333.475676,\n    \"stddev_ts\": 0.853882,\n    \"samples_ns\": [ 1531127005, 1536030108, 1538896099 ],\n    \"samples_ts\": [ 334.394, 333.327, 332.706 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:21:26Z\",\n    \"avg_ns\": 3598928653,\n    \"stddev_ns\": 4323609,\n    \"avg_ts\": 35.566174,\n    \"stddev_ts\": 0.042697,\n    \"samples_ns\": [ 3603790736, 3595519595, 3597475630 ],\n    \"samples_ts\": [ 35.5182, 35.5999, 35.5805 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:21:20Z",
+          "avg_ns": 1535351070,
+          "stddev_ns": 3929197,
+          "avg_ts": 333.475676,
+          "stddev_ts": 0.853882,
+          "samples_ns": [
+            1531127005,
+            1536030108,
+            1538896099
+          ],
+          "samples_ts": [
+            334.394,
+            333.327,
+            332.706
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:21:26Z",
+          "avg_ns": 3598928653,
+          "stddev_ns": 4323609,
+          "avg_ts": 35.566174,
+          "stddev_ts": 0.042697,
+          "samples_ns": [
+            3603790736,
+            3595519595,
+            3597475630
+          ],
+          "samples_ts": [
+            35.5182,
+            35.5999,
+            35.5805
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 406
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:22:28.424474+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:21:38Z\",\n    \"avg_ns\": 1589283507,\n    \"stddev_ns\": 1179100,\n    \"avg_ts\": 322.157869,\n    \"stddev_ts\": 0.238695,\n    \"samples_ns\": [ 1590547181, 1588216417, 1589086925 ],\n    \"samples_ts\": [ 321.902, 322.374, 322.198 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:21:44Z\",\n    \"avg_ns\": 14645973490,\n    \"stddev_ns\": 19797848,\n    \"avg_ts\": 34.958456,\n    \"stddev_ts\": 0.047265,\n    \"samples_ns\": [ 14625203441, 14648088451, 14664628579 ],\n    \"samples_ts\": [ 35.0081, 34.9534, 34.9139 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:21:38Z",
+          "avg_ns": 1589283507,
+          "stddev_ns": 1179100,
+          "avg_ts": 322.157869,
+          "stddev_ts": 0.238695,
+          "samples_ns": [
+            1590547181,
+            1588216417,
+            1589086925
+          ],
+          "samples_ts": [
+            321.902,
+            322.374,
+            322.198
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:21:44Z",
+          "avg_ns": 14645973490,
+          "stddev_ns": 19797848,
+          "avg_ts": 34.958456,
+          "stddev_ts": 0.047265,
+          "samples_ns": [
+            14625203441,
+            14648088451,
+            14664628579
+          ],
+          "samples_ts": [
+            35.0081,
+            34.9534,
+            34.9139
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 407
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:22:41.383799+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:22:29Z\",\n    \"avg_ns\": 322894726,\n    \"stddev_ns\": 1875411,\n    \"avg_ts\": 396.422915,\n    \"stddev_ts\": 2.294651,\n    \"samples_ns\": [ 321905428, 325057455, 321721297 ],\n    \"samples_ts\": [ 397.632, 393.777, 397.86 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:22:30Z\",\n    \"avg_ns\": 3589214919,\n    \"stddev_ns\": 2874965,\n    \"avg_ts\": 35.662410,\n    \"stddev_ts\": 0.028553,\n    \"samples_ns\": [ 3592083034, 3589226093, 3586335632 ],\n    \"samples_ts\": [ 35.6339, 35.6623, 35.691 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:22:29Z",
+          "avg_ns": 322894726,
+          "stddev_ns": 1875411,
+          "avg_ts": 396.422915,
+          "stddev_ts": 2.294651,
+          "samples_ns": [
+            321905428,
+            325057455,
+            321721297
+          ],
+          "samples_ts": [
+            397.632,
+            393.777,
+            397.86
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:22:30Z",
+          "avg_ns": 3589214919,
+          "stddev_ns": 2874965,
+          "avg_ts": 35.66241,
+          "stddev_ts": 0.028553,
+          "samples_ns": [
+            3592083034,
+            3589226093,
+            3586335632
+          ],
+          "samples_ts": [
+            35.6339,
+            35.6623,
+            35.691
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 408
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:23:27.311596+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:22:42Z\",\n    \"avg_ns\": 322701406,\n    \"stddev_ns\": 724023,\n    \"avg_ts\": 396.652838,\n    \"stddev_ts\": 0.888969,\n    \"samples_ns\": [ 322036585, 323472080, 322595555 ],\n    \"samples_ts\": [ 397.47, 395.706, 396.782 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:22:43Z\",\n    \"avg_ns\": 14578874845,\n    \"stddev_ns\": 13058101,\n    \"avg_ts\": 35.119327,\n    \"stddev_ts\": 0.031438,\n    \"samples_ns\": [ 14572756442, 14593867560, 14570000535 ],\n    \"samples_ts\": [ 35.1341, 35.0832, 35.1407 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:22:42Z",
+          "avg_ns": 322701406,
+          "stddev_ns": 724023,
+          "avg_ts": 396.652838,
+          "stddev_ts": 0.888969,
+          "samples_ns": [
+            322036585,
+            323472080,
+            322595555
+          ],
+          "samples_ts": [
+            397.47,
+            395.706,
+            396.782
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:22:43Z",
+          "avg_ns": 14578874845,
+          "stddev_ns": 13058101,
+          "avg_ts": 35.119327,
+          "stddev_ts": 0.031438,
+          "samples_ns": [
+            14572756442,
+            14593867560,
+            14570000535
+          ],
+          "samples_ts": [
+            35.1341,
+            35.0832,
+            35.1407
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 409
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:23:44.208278+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:23:28Z\",\n    \"avg_ns\": 1309177171,\n    \"stddev_ns\": 1934613,\n    \"avg_ts\": 391.085910,\n    \"stddev_ts\": 0.577429,\n    \"samples_ns\": [ 1308009972, 1308111236, 1311410305 ],\n    \"samples_ts\": [ 391.434, 391.404, 390.419 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:23:33Z\",\n    \"avg_ns\": 3581887136,\n    \"stddev_ns\": 5069448,\n    \"avg_ts\": 35.735400,\n    \"stddev_ts\": 0.050528,\n    \"samples_ns\": [ 3578896228, 3579025643, 3587739539 ],\n    \"samples_ts\": [ 35.7652, 35.7639, 35.6771 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:23:28Z",
+          "avg_ns": 1309177171,
+          "stddev_ns": 1934613,
+          "avg_ts": 391.08591,
+          "stddev_ts": 0.577429,
+          "samples_ns": [
+            1308009972,
+            1308111236,
+            1311410305
+          ],
+          "samples_ts": [
+            391.434,
+            391.404,
+            390.419
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:23:33Z",
+          "avg_ns": 3581887136,
+          "stddev_ns": 5069448,
+          "avg_ts": 35.7354,
+          "stddev_ts": 0.050528,
+          "samples_ns": [
+            3578896228,
+            3579025643,
+            3587739539
+          ],
+          "samples_ts": [
+            35.7652,
+            35.7639,
+            35.6771
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 410
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:24:34.337910+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:23:45Z\",\n    \"avg_ns\": 1343360561,\n    \"stddev_ns\": 648049,\n    \"avg_ts\": 381.133773,\n    \"stddev_ts\": 0.183814,\n    \"samples_ns\": [ 1343063950, 1344103833, 1342913900 ],\n    \"samples_ts\": [ 381.218, 380.923, 381.26 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:23:50Z\",\n    \"avg_ns\": 14612327250,\n    \"stddev_ns\": 7775075,\n    \"avg_ts\": 35.038915,\n    \"stddev_ts\": 0.018641,\n    \"samples_ns\": [ 14619663380, 14604180516, 14613137856 ],\n    \"samples_ts\": [ 35.0213, 35.0585, 35.037 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:23:45Z",
+          "avg_ns": 1343360561,
+          "stddev_ns": 648049,
+          "avg_ts": 381.133773,
+          "stddev_ts": 0.183814,
+          "samples_ns": [
+            1343063950,
+            1344103833,
+            1342913900
+          ],
+          "samples_ts": [
+            381.218,
+            380.923,
+            381.26
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:23:50Z",
+          "avg_ns": 14612327250,
+          "stddev_ns": 7775075,
+          "avg_ts": 35.038915,
+          "stddev_ts": 0.018641,
+          "samples_ns": [
+            14619663380,
+            14604180516,
+            14613137856
+          ],
+          "samples_ts": [
+            35.0213,
+            35.0585,
+            35.037
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 411
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:24:47.326565+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:24:35Z\",\n    \"avg_ns\": 321554095,\n    \"stddev_ns\": 511701,\n    \"avg_ts\": 398.067442,\n    \"stddev_ts\": 0.632929,\n    \"samples_ns\": [ 321190147, 321332963, 322139175 ],\n    \"samples_ts\": [ 398.518, 398.341, 397.344 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:24:36Z\",\n    \"avg_ns\": 3598332523,\n    \"stddev_ns\": 9078480,\n    \"avg_ts\": 35.572183,\n    \"stddev_ts\": 0.089662,\n    \"samples_ns\": [ 3590712626, 3595908045, 3608376899 ],\n    \"samples_ts\": [ 35.6475, 35.596, 35.473 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:24:35Z",
+          "avg_ns": 321554095,
+          "stddev_ns": 511701,
+          "avg_ts": 398.067442,
+          "stddev_ts": 0.632929,
+          "samples_ns": [
+            321190147,
+            321332963,
+            322139175
+          ],
+          "samples_ts": [
+            398.518,
+            398.341,
+            397.344
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:24:36Z",
+          "avg_ns": 3598332523,
+          "stddev_ns": 9078480,
+          "avg_ts": 35.572183,
+          "stddev_ts": 0.089662,
+          "samples_ns": [
+            3590712626,
+            3595908045,
+            3608376899
+          ],
+          "samples_ts": [
+            35.6475,
+            35.596,
+            35.473
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 412
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:25:33.183810+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:24:48Z\",\n    \"avg_ns\": 323100442,\n    \"stddev_ns\": 1671314,\n    \"avg_ts\": 396.168686,\n    \"stddev_ts\": 2.044565,\n    \"samples_ns\": [ 321754474, 322575962, 324970892 ],\n    \"samples_ts\": [ 397.819, 396.806, 393.881 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:24:49Z\",\n    \"avg_ns\": 14553435315,\n    \"stddev_ns\": 21298867,\n    \"avg_ts\": 35.180747,\n    \"stddev_ts\": 0.051516,\n    \"samples_ns\": [ 14529703251, 14559713948, 14570888747 ],\n    \"samples_ts\": [ 35.2382, 35.1655, 35.1386 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:24:48Z",
+          "avg_ns": 323100442,
+          "stddev_ns": 1671314,
+          "avg_ts": 396.168686,
+          "stddev_ts": 2.044565,
+          "samples_ns": [
+            321754474,
+            322575962,
+            324970892
+          ],
+          "samples_ts": [
+            397.819,
+            396.806,
+            393.881
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:24:49Z",
+          "avg_ns": 14553435315,
+          "stddev_ns": 21298867,
+          "avg_ts": 35.180747,
+          "stddev_ts": 0.051516,
+          "samples_ns": [
+            14529703251,
+            14559713948,
+            14570888747
+          ],
+          "samples_ts": [
+            35.2382,
+            35.1655,
+            35.1386
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 413
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:25:50.324505+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:25:34Z\",\n    \"avg_ns\": 1363328249,\n    \"stddev_ns\": 1043603,\n    \"avg_ts\": 375.551669,\n    \"stddev_ts\": 0.287423,\n    \"samples_ns\": [ 1362126569, 1363997774, 1363860405 ],\n    \"samples_ts\": [ 375.883, 375.367, 375.405 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:25:39Z\",\n    \"avg_ns\": 3578813439,\n    \"stddev_ns\": 2125657,\n    \"avg_ts\": 35.766053,\n    \"stddev_ts\": 0.021220,\n    \"samples_ns\": [ 3577864221, 3581246492, 3577329606 ],\n    \"samples_ts\": [ 35.7755, 35.7417, 35.7809 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:25:34Z",
+          "avg_ns": 1363328249,
+          "stddev_ns": 1043603,
+          "avg_ts": 375.551669,
+          "stddev_ts": 0.287423,
+          "samples_ns": [
+            1362126569,
+            1363997774,
+            1363860405
+          ],
+          "samples_ts": [
+            375.883,
+            375.367,
+            375.405
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:25:39Z",
+          "avg_ns": 3578813439,
+          "stddev_ns": 2125657,
+          "avg_ts": 35.766053,
+          "stddev_ts": 0.02122,
+          "samples_ns": [
+            3577864221,
+            3581246492,
+            3577329606
+          ],
+          "samples_ts": [
+            35.7755,
+            35.7417,
+            35.7809
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 414
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:26:40.183281+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:25:51Z\",\n    \"avg_ns\": 1338768298,\n    \"stddev_ns\": 2455948,\n    \"avg_ts\": 382.441942,\n    \"stddev_ts\": 0.701273,\n    \"samples_ns\": [ 1336418874, 1338568544, 1341317478 ],\n    \"samples_ts\": [ 383.113, 382.498, 381.714 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:25:56Z\",\n    \"avg_ns\": 14533439195,\n    \"stddev_ns\": 15400823,\n    \"avg_ts\": 35.229128,\n    \"stddev_ts\": 0.037319,\n    \"samples_ns\": [ 14550310750, 14529871512, 14520135323 ],\n    \"samples_ts\": [ 35.1883, 35.2378, 35.2614 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:25:51Z",
+          "avg_ns": 1338768298,
+          "stddev_ns": 2455948,
+          "avg_ts": 382.441942,
+          "stddev_ts": 0.701273,
+          "samples_ns": [
+            1336418874,
+            1338568544,
+            1341317478
+          ],
+          "samples_ts": [
+            383.113,
+            382.498,
+            381.714
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:25:56Z",
+          "avg_ns": 14533439195,
+          "stddev_ns": 15400823,
+          "avg_ts": 35.229128,
+          "stddev_ts": 0.037319,
+          "samples_ns": [
+            14550310750,
+            14529871512,
+            14520135323
+          ],
+          "samples_ts": [
+            35.1883,
+            35.2378,
+            35.2614
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 415
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:26:53.199275+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:26:41Z\",\n    \"avg_ns\": 325710292,\n    \"stddev_ns\": 3924517,\n    \"avg_ts\": 393.025052,\n    \"stddev_ts\": 4.702930,\n    \"samples_ns\": [ 323513803, 323375839, 330241234 ],\n    \"samples_ts\": [ 395.655, 395.824, 387.595 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:26:42Z\",\n    \"avg_ns\": 3589552824,\n    \"stddev_ns\": 3984821,\n    \"avg_ts\": 35.659067,\n    \"stddev_ts\": 0.039583,\n    \"samples_ns\": [ 3593340546, 3585398102, 3589919826 ],\n    \"samples_ts\": [ 35.6214, 35.7004, 35.6554 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:26:41Z",
+          "avg_ns": 325710292,
+          "stddev_ns": 3924517,
+          "avg_ts": 393.025052,
+          "stddev_ts": 4.70293,
+          "samples_ns": [
+            323513803,
+            323375839,
+            330241234
+          ],
+          "samples_ts": [
+            395.655,
+            395.824,
+            387.595
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:26:42Z",
+          "avg_ns": 3589552824,
+          "stddev_ns": 3984821,
+          "avg_ts": 35.659067,
+          "stddev_ts": 0.039583,
+          "samples_ns": [
+            3593340546,
+            3585398102,
+            3589919826
+          ],
+          "samples_ts": [
+            35.6214,
+            35.7004,
+            35.6554
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 416
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:27:38.984350+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:26:53Z\",\n    \"avg_ns\": 322828494,\n    \"stddev_ns\": 1405388,\n    \"avg_ts\": 396.500358,\n    \"stddev_ts\": 1.722035,\n    \"samples_ns\": [ 322182834, 321861960, 324440688 ],\n    \"samples_ts\": [ 397.29, 397.686, 394.525 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:26:55Z\",\n    \"avg_ns\": 14530094028,\n    \"stddev_ns\": 16055650,\n    \"avg_ts\": 35.237240,\n    \"stddev_ts\": 0.038960,\n    \"samples_ns\": [ 14538517349, 14511580091, 14540184645 ],\n    \"samples_ts\": [ 35.2168, 35.2822, 35.2128 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:26:53Z",
+          "avg_ns": 322828494,
+          "stddev_ns": 1405388,
+          "avg_ts": 396.500358,
+          "stddev_ts": 1.722035,
+          "samples_ns": [
+            322182834,
+            321861960,
+            324440688
+          ],
+          "samples_ts": [
+            397.29,
+            397.686,
+            394.525
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:26:55Z",
+          "avg_ns": 14530094028,
+          "stddev_ns": 16055650,
+          "avg_ts": 35.23724,
+          "stddev_ts": 0.03896,
+          "samples_ns": [
+            14538517349,
+            14511580091,
+            14540184645
+          ],
+          "samples_ts": [
+            35.2168,
+            35.2822,
+            35.2128
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 417
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:27:56.577591+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:27:39Z\",\n    \"avg_ns\": 1475038591,\n    \"stddev_ns\": 1136672,\n    \"avg_ts\": 347.109700,\n    \"stddev_ts\": 0.267377,\n    \"samples_ns\": [ 1474553059, 1474225318, 1476337396 ],\n    \"samples_ts\": [ 347.224, 347.301, 346.804 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:27:45Z\",\n    \"avg_ns\": 3594918936,\n    \"stddev_ns\": 10317219,\n    \"avg_ts\": 35.606005,\n    \"stddev_ts\": 0.102140,\n    \"samples_ns\": [ 3593874221, 3605718417, 3585164172 ],\n    \"samples_ts\": [ 35.6162, 35.4992, 35.7027 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:27:39Z",
+          "avg_ns": 1475038591,
+          "stddev_ns": 1136672,
+          "avg_ts": 347.1097,
+          "stddev_ts": 0.267377,
+          "samples_ns": [
+            1474553059,
+            1474225318,
+            1476337396
+          ],
+          "samples_ts": [
+            347.224,
+            347.301,
+            346.804
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:27:45Z",
+          "avg_ns": 3594918936,
+          "stddev_ns": 10317219,
+          "avg_ts": 35.606005,
+          "stddev_ts": 0.10214,
+          "samples_ns": [
+            3593874221,
+            3605718417,
+            3585164172
+          ],
+          "samples_ts": [
+            35.6162,
+            35.4992,
+            35.7027
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 418
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:28:47.247881+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:27:57Z\",\n    \"avg_ns\": 1528117802,\n    \"stddev_ns\": 872050,\n    \"avg_ts\": 335.052776,\n    \"stddev_ts\": 0.191066,\n    \"samples_ns\": [ 1528767342, 1527127812, 1528458253 ],\n    \"samples_ts\": [ 334.91, 335.27, 334.978 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:28:03Z\",\n    \"avg_ns\": 14547583221,\n    \"stddev_ns\": 2812858,\n    \"avg_ts\": 35.194850,\n    \"stddev_ts\": 0.006799,\n    \"samples_ns\": [ 14548483506, 14544433127, 14549833031 ],\n    \"samples_ts\": [ 35.1927, 35.2025, 35.1894 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:27:57Z",
+          "avg_ns": 1528117802,
+          "stddev_ns": 872050,
+          "avg_ts": 335.052776,
+          "stddev_ts": 0.191066,
+          "samples_ns": [
+            1528767342,
+            1527127812,
+            1528458253
+          ],
+          "samples_ts": [
+            334.91,
+            335.27,
+            334.978
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:28:03Z",
+          "avg_ns": 14547583221,
+          "stddev_ns": 2812858,
+          "avg_ts": 35.19485,
+          "stddev_ts": 0.006799,
+          "samples_ns": [
+            14548483506,
+            14544433127,
+            14549833031
+          ],
+          "samples_ts": [
+            35.1927,
+            35.2025,
+            35.1894
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 419
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:29:00.225668+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:28:48Z\",\n    \"avg_ns\": 322462310,\n    \"stddev_ns\": 343973,\n    \"avg_ts\": 396.945915,\n    \"stddev_ts\": 0.422625,\n    \"samples_ns\": [ 322205783, 322328581, 322852567 ],\n    \"samples_ts\": [ 397.262, 397.11, 396.466 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:28:49Z\",\n    \"avg_ns\": 3594080306,\n    \"stddev_ns\": 1638939,\n    \"avg_ts\": 35.614123,\n    \"stddev_ts\": 0.016223,\n    \"samples_ns\": [ 3594876220, 3592197821, 3595166879 ],\n    \"samples_ts\": [ 35.6062, 35.6328, 35.6034 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:28:48Z",
+          "avg_ns": 322462310,
+          "stddev_ns": 343973,
+          "avg_ts": 396.945915,
+          "stddev_ts": 0.422625,
+          "samples_ns": [
+            322205783,
+            322328581,
+            322852567
+          ],
+          "samples_ts": [
+            397.262,
+            397.11,
+            396.466
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:28:49Z",
+          "avg_ns": 3594080306,
+          "stddev_ns": 1638939,
+          "avg_ts": 35.614123,
+          "stddev_ts": 0.016223,
+          "samples_ns": [
+            3594876220,
+            3592197821,
+            3595166879
+          ],
+          "samples_ts": [
+            35.6062,
+            35.6328,
+            35.6034
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 420
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:29:45.981859+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:29:01Z\",\n    \"avg_ns\": 322259424,\n    \"stddev_ns\": 250899,\n    \"avg_ts\": 397.195681,\n    \"stddev_ts\": 0.308414,\n    \"samples_ns\": [ 322022323, 322234918, 322521032 ],\n    \"samples_ts\": [ 397.488, 397.226, 396.873 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:29:02Z\",\n    \"avg_ns\": 14520495134,\n    \"stddev_ns\": 18660751,\n    \"avg_ts\": 35.260544,\n    \"stddev_ts\": 0.045310,\n    \"samples_ns\": [ 14539537507, 14502241814, 14519706082 ],\n    \"samples_ts\": [ 35.2143, 35.3049, 35.2624 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:29:01Z",
+          "avg_ns": 322259424,
+          "stddev_ns": 250899,
+          "avg_ts": 397.195681,
+          "stddev_ts": 0.308414,
+          "samples_ns": [
+            322022323,
+            322234918,
+            322521032
+          ],
+          "samples_ts": [
+            397.488,
+            397.226,
+            396.873
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:29:02Z",
+          "avg_ns": 14520495134,
+          "stddev_ns": 18660751,
+          "avg_ts": 35.260544,
+          "stddev_ts": 0.04531,
+          "samples_ns": [
+            14539537507,
+            14502241814,
+            14519706082
+          ],
+          "samples_ts": [
+            35.2143,
+            35.3049,
+            35.2624
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 421
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:30:02.913683+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:29:46Z\",\n    \"avg_ns\": 1318911537,\n    \"stddev_ns\": 2766434,\n    \"avg_ts\": 388.200032,\n    \"stddev_ts\": 0.813645,\n    \"samples_ns\": [ 1316504835, 1318296186, 1321933591 ],\n    \"samples_ts\": [ 388.909, 388.38, 387.311 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:29:52Z\",\n    \"avg_ns\": 3583190286,\n    \"stddev_ns\": 3907087,\n    \"avg_ts\": 35.722385,\n    \"stddev_ts\": 0.038956,\n    \"samples_ns\": [ 3586768808, 3583779551, 3579022500 ],\n    \"samples_ts\": [ 35.6867, 35.7165, 35.764 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:29:46Z",
+          "avg_ns": 1318911537,
+          "stddev_ns": 2766434,
+          "avg_ts": 388.200032,
+          "stddev_ts": 0.813645,
+          "samples_ns": [
+            1316504835,
+            1318296186,
+            1321933591
+          ],
+          "samples_ts": [
+            388.909,
+            388.38,
+            387.311
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:29:52Z",
+          "avg_ns": 3583190286,
+          "stddev_ns": 3907087,
+          "avg_ts": 35.722385,
+          "stddev_ts": 0.038956,
+          "samples_ns": [
+            3586768808,
+            3583779551,
+            3579022500
+          ],
+          "samples_ts": [
+            35.6867,
+            35.7165,
+            35.764
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 422
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:30:53.105223+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:30:03Z\",\n    \"avg_ns\": 1342082123,\n    \"stddev_ns\": 1121497,\n    \"avg_ts\": 381.496951,\n    \"stddev_ts\": 0.318559,\n    \"samples_ns\": [ 1343285306, 1341893665, 1341067399 ],\n    \"samples_ts\": [ 381.155, 381.55, 381.785 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:30:09Z\",\n    \"avg_ns\": 14638701117,\n    \"stddev_ns\": 8164727,\n    \"avg_ts\": 34.975788,\n    \"stddev_ts\": 0.019504,\n    \"samples_ns\": [ 14631574370, 14647609571, 14636919410 ],\n    \"samples_ts\": [ 34.9928, 34.9545, 34.98 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:30:03Z",
+          "avg_ns": 1342082123,
+          "stddev_ns": 1121497,
+          "avg_ts": 381.496951,
+          "stddev_ts": 0.318559,
+          "samples_ns": [
+            1343285306,
+            1341893665,
+            1341067399
+          ],
+          "samples_ts": [
+            381.155,
+            381.55,
+            381.785
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:30:09Z",
+          "avg_ns": 14638701117,
+          "stddev_ns": 8164727,
+          "avg_ts": 34.975788,
+          "stddev_ts": 0.019504,
+          "samples_ns": [
+            14631574370,
+            14647609571,
+            14636919410
+          ],
+          "samples_ts": [
+            34.9928,
+            34.9545,
+            34.98
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 423
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:31:06.057603+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:30:53Z\",\n    \"avg_ns\": 322985019,\n    \"stddev_ns\": 662143,\n    \"avg_ts\": 396.304321,\n    \"stddev_ts\": 0.812646,\n    \"samples_ns\": [ 322298951, 323035791, 323620315 ],\n    \"samples_ts\": [ 397.147, 396.241, 395.525 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:30:55Z\",\n    \"avg_ns\": 3582985563,\n    \"stddev_ns\": 2426822,\n    \"avg_ts\": 35.724408,\n    \"stddev_ts\": 0.024202,\n    \"samples_ns\": [ 3580342979, 3583499370, 3585114340 ],\n    \"samples_ts\": [ 35.7508, 35.7193, 35.7032 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:30:53Z",
+          "avg_ns": 322985019,
+          "stddev_ns": 662143,
+          "avg_ts": 396.304321,
+          "stddev_ts": 0.812646,
+          "samples_ns": [
+            322298951,
+            323035791,
+            323620315
+          ],
+          "samples_ts": [
+            397.147,
+            396.241,
+            395.525
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:30:55Z",
+          "avg_ns": 3582985563,
+          "stddev_ns": 2426822,
+          "avg_ts": 35.724408,
+          "stddev_ts": 0.024202,
+          "samples_ns": [
+            3580342979,
+            3583499370,
+            3585114340
+          ],
+          "samples_ts": [
+            35.7508,
+            35.7193,
+            35.7032
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 424
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:31:51.997845+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:31:06Z\",\n    \"avg_ns\": 322347331,\n    \"stddev_ns\": 203983,\n    \"avg_ts\": 397.087306,\n    \"stddev_ts\": 0.249244,\n    \"samples_ns\": [ 322200250, 322263584, 322578161 ],\n    \"samples_ts\": [ 397.268, 397.19, 396.803 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:31:08Z\",\n    \"avg_ns\": 14578013228,\n    \"stddev_ns\": 16147001,\n    \"avg_ts\": 35.121413,\n    \"stddev_ts\": 0.038920,\n    \"samples_ns\": [ 14559737966, 14583955927, 14590345793 ],\n    \"samples_ts\": [ 35.1655, 35.1071, 35.0917 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:31:06Z",
+          "avg_ns": 322347331,
+          "stddev_ns": 203983,
+          "avg_ts": 397.087306,
+          "stddev_ts": 0.249244,
+          "samples_ns": [
+            322200250,
+            322263584,
+            322578161
+          ],
+          "samples_ts": [
+            397.268,
+            397.19,
+            396.803
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:31:08Z",
+          "avg_ns": 14578013228,
+          "stddev_ns": 16147001,
+          "avg_ts": 35.121413,
+          "stddev_ts": 0.03892,
+          "samples_ns": [
+            14559737966,
+            14583955927,
+            14590345793
+          ],
+          "samples_ts": [
+            35.1655,
+            35.1071,
+            35.0917
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 425
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:32:09.253504+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:31:52Z\",\n    \"avg_ns\": 1387063227,\n    \"stddev_ns\": 4058953,\n    \"avg_ts\": 369.127311,\n    \"stddev_ts\": 1.078777,\n    \"samples_ns\": [ 1385770819, 1383808024, 1391610839 ],\n    \"samples_ts\": [ 369.469, 369.994, 367.919 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:31:58Z\",\n    \"avg_ns\": 3598527334,\n    \"stddev_ns\": 11692957,\n    \"avg_ts\": 35.570356,\n    \"stddev_ts\": 0.115374,\n    \"samples_ns\": [ 3592922044, 3590692273, 3611967685 ],\n    \"samples_ts\": [ 35.6256, 35.6477, 35.4377 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:31:52Z",
+          "avg_ns": 1387063227,
+          "stddev_ns": 4058953,
+          "avg_ts": 369.127311,
+          "stddev_ts": 1.078777,
+          "samples_ns": [
+            1385770819,
+            1383808024,
+            1391610839
+          ],
+          "samples_ts": [
+            369.469,
+            369.994,
+            367.919
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:31:58Z",
+          "avg_ns": 3598527334,
+          "stddev_ns": 11692957,
+          "avg_ts": 35.570356,
+          "stddev_ts": 0.115374,
+          "samples_ns": [
+            3592922044,
+            3590692273,
+            3611967685
+          ],
+          "samples_ts": [
+            35.6256,
+            35.6477,
+            35.4377
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 426
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:32:59.821883+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:32:10Z\",\n    \"avg_ns\": 1392945052,\n    \"stddev_ns\": 2294719,\n    \"avg_ts\": 367.567211,\n    \"stddev_ts\": 0.606059,\n    \"samples_ns\": [ 1390317711, 1394556289, 1393961156 ],\n    \"samples_ts\": [ 368.261, 367.142, 367.299 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:32:15Z\",\n    \"avg_ns\": 14688339633,\n    \"stddev_ns\": 10850029,\n    \"avg_ts\": 34.857594,\n    \"stddev_ts\": 0.025737,\n    \"samples_ns\": [ 14683411667, 14700778325, 14680828908 ],\n    \"samples_ts\": [ 34.8693, 34.8281, 34.8754 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:32:10Z",
+          "avg_ns": 1392945052,
+          "stddev_ns": 2294719,
+          "avg_ts": 367.567211,
+          "stddev_ts": 0.606059,
+          "samples_ns": [
+            1390317711,
+            1394556289,
+            1393961156
+          ],
+          "samples_ts": [
+            368.261,
+            367.142,
+            367.299
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:32:15Z",
+          "avg_ns": 14688339633,
+          "stddev_ns": 10850029,
+          "avg_ts": 34.857594,
+          "stddev_ts": 0.025737,
+          "samples_ns": [
+            14683411667,
+            14700778325,
+            14680828908
+          ],
+          "samples_ts": [
+            34.8693,
+            34.8281,
+            34.8754
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 427
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:33:12.823270+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:33:00Z\",\n    \"avg_ns\": 324752071,\n    \"stddev_ns\": 3224536,\n    \"avg_ts\": 394.172766,\n    \"stddev_ts\": 3.917837,\n    \"samples_ns\": [ 327865403, 321426787, 324964023 ],\n    \"samples_ts\": [ 390.404, 398.224, 393.89 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:33:01Z\",\n    \"avg_ns\": 3587417776,\n    \"stddev_ns\": 7276433,\n    \"avg_ts\": 35.680358,\n    \"stddev_ts\": 0.072350,\n    \"samples_ns\": [ 3586790195, 3594987427, 3580475707 ],\n    \"samples_ts\": [ 35.6865, 35.6051, 35.7494 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:33:00Z",
+          "avg_ns": 324752071,
+          "stddev_ns": 3224536,
+          "avg_ts": 394.172766,
+          "stddev_ts": 3.917837,
+          "samples_ns": [
+            327865403,
+            321426787,
+            324964023
+          ],
+          "samples_ts": [
+            390.404,
+            398.224,
+            393.89
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:33:01Z",
+          "avg_ns": 3587417776,
+          "stddev_ns": 7276433,
+          "avg_ts": 35.680358,
+          "stddev_ts": 0.07235,
+          "samples_ns": [
+            3586790195,
+            3594987427,
+            3580475707
+          ],
+          "samples_ts": [
+            35.6865,
+            35.6051,
+            35.7494
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 428
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:33:58.705531+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:33:13Z\",\n    \"avg_ns\": 324072172,\n    \"stddev_ns\": 2585552,\n    \"avg_ts\": 394.990434,\n    \"stddev_ts\": 3.137343,\n    \"samples_ns\": [ 327047860, 322374465, 322794191 ],\n    \"samples_ts\": [ 391.38, 397.054, 396.537 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:33:14Z\",\n    \"avg_ns\": 14558231212,\n    \"stddev_ns\": 5728306,\n    \"avg_ts\": 35.169111,\n    \"stddev_ts\": 0.013838,\n    \"samples_ns\": [ 14562543091, 14560417475, 14551733071 ],\n    \"samples_ts\": [ 35.1587, 35.1638, 35.1848 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:33:13Z",
+          "avg_ns": 324072172,
+          "stddev_ns": 2585552,
+          "avg_ts": 394.990434,
+          "stddev_ts": 3.137343,
+          "samples_ns": [
+            327047860,
+            322374465,
+            322794191
+          ],
+          "samples_ts": [
+            391.38,
+            397.054,
+            396.537
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:33:14Z",
+          "avg_ns": 14558231212,
+          "stddev_ns": 5728306,
+          "avg_ts": 35.169111,
+          "stddev_ts": 0.013838,
+          "samples_ns": [
+            14562543091,
+            14560417475,
+            14551733071
+          ],
+          "samples_ts": [
+            35.1587,
+            35.1638,
+            35.1848
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 429
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:34:16.548442+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:33:59Z\",\n    \"avg_ns\": 1536336889,\n    \"stddev_ns\": 1887398,\n    \"avg_ts\": 333.260575,\n    \"stddev_ts\": 0.409109,\n    \"samples_ns\": [ 1536010064, 1534635071, 1538365534 ],\n    \"samples_ts\": [ 333.331, 333.63, 332.821 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:34:05Z\",\n    \"avg_ns\": 3596991173,\n    \"stddev_ns\": 1549159,\n    \"avg_ts\": 35.585302,\n    \"stddev_ts\": 0.015327,\n    \"samples_ns\": [ 3598427087, 3595349361, 3597197071 ],\n    \"samples_ts\": [ 35.5711, 35.6015, 35.5833 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:33:59Z",
+          "avg_ns": 1536336889,
+          "stddev_ns": 1887398,
+          "avg_ts": 333.260575,
+          "stddev_ts": 0.409109,
+          "samples_ns": [
+            1536010064,
+            1534635071,
+            1538365534
+          ],
+          "samples_ts": [
+            333.331,
+            333.63,
+            332.821
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:34:05Z",
+          "avg_ns": 3596991173,
+          "stddev_ns": 1549159,
+          "avg_ts": 35.585302,
+          "stddev_ts": 0.015327,
+          "samples_ns": [
+            3598427087,
+            3595349361,
+            3597197071
+          ],
+          "samples_ts": [
+            35.5711,
+            35.6015,
+            35.5833
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 430
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:35:07.755610+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:34:17Z\",\n    \"avg_ns\": 1531468358,\n    \"stddev_ns\": 1659091,\n    \"avg_ts\": 334.319934,\n    \"stddev_ts\": 0.362063,\n    \"samples_ns\": [ 1531711224, 1529702151, 1532991701 ],\n    \"samples_ts\": [ 334.267, 334.706, 333.987 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 270M Q2_K - Medium\",\n    \"model_size\": 230552064,\n    \"model_n_params\": 268098176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:34:23Z\",\n    \"avg_ns\": 14721699820,\n    \"stddev_ns\": 24190581,\n    \"avg_ts\": 34.778655,\n    \"stddev_ts\": 0.057095,\n    \"samples_ns\": [ 14749530804, 14705723560, 14709845097 ],\n    \"samples_ts\": [ 34.713, 34.8164, 34.8066 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:34:17Z",
+          "avg_ns": 1531468358,
+          "stddev_ns": 1659091,
+          "avg_ts": 334.319934,
+          "stddev_ts": 0.362063,
+          "samples_ns": [
+            1531711224,
+            1529702151,
+            1532991701
+          ],
+          "samples_ts": [
+            334.267,
+            334.706,
+            333.987
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+          "model_type": "gemma3 270M Q2_K - Medium",
+          "model_size": 230552064,
+          "model_n_params": 268098176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:34:23Z",
+          "avg_ns": 14721699820,
+          "stddev_ns": 24190581,
+          "avg_ts": 34.778655,
+          "stddev_ts": 0.057095,
+          "samples_ns": [
+            14749530804,
+            14705723560,
+            14709845097
+          ],
+          "samples_ts": [
+            34.713,
+            34.8164,
+            34.8066
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-270M-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 431
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:36:49.427521+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:35:17Z\",\n    \"avg_ns\": 11022977017,\n    \"stddev_ns\": 18402675,\n    \"avg_ts\": 11.612130,\n    \"stddev_ts\": 0.019376,\n    \"samples_ns\": [ 11019054573, 11043024374, 11006852105 ],\n    \"samples_ts\": [ 11.6162, 11.591, 11.6291 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:36:01Z\",\n    \"avg_ns\": 15779086672,\n    \"stddev_ns\": 11387911,\n    \"avg_ts\": 8.112006,\n    \"stddev_ts\": 0.005853,\n    \"samples_ns\": [ 15777086004, 15791342339, 15768831673 ],\n    \"samples_ts\": [ 8.11303, 8.10571, 8.11728 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:35:17Z",
+          "avg_ns": 11022977017,
+          "stddev_ns": 18402675,
+          "avg_ts": 11.61213,
+          "stddev_ts": 0.019376,
+          "samples_ns": [
+            11019054573,
+            11043024374,
+            11006852105
+          ],
+          "samples_ts": [
+            11.6162,
+            11.591,
+            11.6291
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:36:01Z",
+          "avg_ns": 15779086672,
+          "stddev_ns": 11387911,
+          "avg_ts": 8.112006,
+          "stddev_ts": 0.005853,
+          "samples_ns": [
+            15777086004,
+            15791342339,
+            15768831673
+          ],
+          "samples_ts": [
+            8.11303,
+            8.10571,
+            8.11728
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 432
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:40:44.683544+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:36:50Z\",\n    \"avg_ns\": 10979671982,\n    \"stddev_ns\": 646581,\n    \"avg_ts\": 11.657907,\n    \"stddev_ts\": 0.000687,\n    \"samples_ns\": [ 10980150916, 10979928534, 10978936496 ],\n    \"samples_ts\": [ 11.6574, 11.6576, 11.6587 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:37:34Z\",\n    \"avg_ns\": 63423023116,\n    \"stddev_ns\": 2652065,\n    \"avg_ts\": 8.072778,\n    \"stddev_ts\": 0.000335,\n    \"samples_ns\": [ 63420127605, 63425257393, 63423684352 ],\n    \"samples_ts\": [ 8.07315, 8.07249, 8.07269 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:36:50Z",
+          "avg_ns": 10979671982,
+          "stddev_ns": 646581,
+          "avg_ts": 11.657907,
+          "stddev_ts": 0.000687,
+          "samples_ns": [
+            10980150916,
+            10979928534,
+            10978936496
+          ],
+          "samples_ts": [
+            11.6574,
+            11.6576,
+            11.6587
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:37:34Z",
+          "avg_ns": 63423023116,
+          "stddev_ns": 2652065,
+          "avg_ts": 8.072778,
+          "stddev_ts": 0.000335,
+          "samples_ns": [
+            63420127605,
+            63425257393,
+            63423684352
+          ],
+          "samples_ts": [
+            8.07315,
+            8.07249,
+            8.07269
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 433
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:44:29.436553+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:40:45Z\",\n    \"avg_ns\": 44157705793,\n    \"stddev_ns\": 2531029,\n    \"avg_ts\": 11.594805,\n    \"stddev_ts\": 0.000660,\n    \"samples_ns\": [ 44158536822, 44159698565, 44154881994 ],\n    \"samples_ts\": [ 11.5946, 11.5943, 11.5955 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:43:42Z\",\n    \"avg_ns\": 15690445977,\n    \"stddev_ns\": 1688104,\n    \"avg_ts\": 8.157831,\n    \"stddev_ts\": 0.000873,\n    \"samples_ns\": [ 15691912548, 15690810506, 15688614879 ],\n    \"samples_ts\": [ 8.15707, 8.15764, 8.15878 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:40:45Z",
+          "avg_ns": 44157705793,
+          "stddev_ns": 2531029,
+          "avg_ts": 11.594805,
+          "stddev_ts": 0.00066,
+          "samples_ns": [
+            44158536822,
+            44159698565,
+            44154881994
+          ],
+          "samples_ts": [
+            11.5946,
+            11.5943,
+            11.5955
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:43:42Z",
+          "avg_ns": 15690445977,
+          "stddev_ns": 1688104,
+          "avg_ts": 8.157831,
+          "stddev_ts": 0.000873,
+          "samples_ns": [
+            15691912548,
+            15690810506,
+            15688614879
+          ],
+          "samples_ts": [
+            8.15707,
+            8.15764,
+            8.15878
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 434
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:50:37.553793+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:44:30Z\",\n    \"avg_ns\": 44174972181,\n    \"stddev_ns\": 766610,\n    \"avg_ts\": 11.590273,\n    \"stddev_ts\": 0.000193,\n    \"samples_ns\": [ 44174314821, 44175769289, 44174832434 ],\n    \"samples_ts\": [ 11.5904, 11.5901, 11.5903 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:47:26Z\",\n    \"avg_ns\": 63456755748,\n    \"stddev_ns\": 1654665,\n    \"avg_ts\": 8.068487,\n    \"stddev_ts\": 0.000208,\n    \"samples_ns\": [ 63458642734, 63455874898, 63455749613 ],\n    \"samples_ts\": [ 8.06825, 8.0686, 8.06861 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:44:30Z",
+          "avg_ns": 44174972181,
+          "stddev_ns": 766610,
+          "avg_ts": 11.590273,
+          "stddev_ts": 0.000193,
+          "samples_ns": [
+            44174314821,
+            44175769289,
+            44174832434
+          ],
+          "samples_ts": [
+            11.5904,
+            11.5901,
+            11.5903
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:47:26Z",
+          "avg_ns": 63456755748,
+          "stddev_ns": 1654665,
+          "avg_ts": 8.068487,
+          "stddev_ts": 0.000208,
+          "samples_ns": [
+            63458642734,
+            63455874898,
+            63455749613
+          ],
+          "samples_ts": [
+            8.06825,
+            8.0686,
+            8.06861
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 435
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:52:09.830068+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:50:38Z\",\n    \"avg_ns\": 10979588178,\n    \"stddev_ns\": 1091666,\n    \"avg_ts\": 11.657997,\n    \"stddev_ts\": 0.001148,\n    \"samples_ns\": [ 10979294827, 10978683553, 10980786156 ],\n    \"samples_ts\": [ 11.6583, 11.659, 11.6567 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:51:22Z\",\n    \"avg_ns\": 15770652865,\n    \"stddev_ns\": 591393,\n    \"avg_ts\": 8.116341,\n    \"stddev_ts\": 0.000304,\n    \"samples_ns\": [ 15771273638, 15770096051, 15770588906 ],\n    \"samples_ts\": [ 8.11602, 8.11663, 8.11637 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:50:38Z",
+          "avg_ns": 10979588178,
+          "stddev_ns": 1091666,
+          "avg_ts": 11.657997,
+          "stddev_ts": 0.001148,
+          "samples_ns": [
+            10979294827,
+            10978683553,
+            10980786156
+          ],
+          "samples_ts": [
+            11.6583,
+            11.659,
+            11.6567
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:51:22Z",
+          "avg_ns": 15770652865,
+          "stddev_ns": 591393,
+          "avg_ts": 8.116341,
+          "stddev_ts": 0.000304,
+          "samples_ns": [
+            15771273638,
+            15770096051,
+            15770588906
+          ],
+          "samples_ts": [
+            8.11602,
+            8.11663,
+            8.11637
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 436
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:56:05.646109+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:52:10Z\",\n    \"avg_ns\": 10983969630,\n    \"stddev_ns\": 1034806,\n    \"avg_ts\": 11.653346,\n    \"stddev_ts\": 0.001087,\n    \"samples_ns\": [ 10984072000, 10984938739, 10982898153 ],\n    \"samples_ts\": [ 11.6532, 11.6523, 11.6545 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:52:54Z\",\n    \"avg_ns\": 63609411151,\n    \"stddev_ns\": 757466,\n    \"avg_ts\": 8.049123,\n    \"stddev_ts\": 0.000096,\n    \"samples_ns\": [ 63609155414, 63610263384, 63608814655 ],\n    \"samples_ts\": [ 8.04916, 8.04902, 8.0492 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:52:10Z",
+          "avg_ns": 10983969630,
+          "stddev_ns": 1034806,
+          "avg_ts": 11.653346,
+          "stddev_ts": 0.001087,
+          "samples_ns": [
+            10984072000,
+            10984938739,
+            10982898153
+          ],
+          "samples_ts": [
+            11.6532,
+            11.6523,
+            11.6545
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:52:54Z",
+          "avg_ns": 63609411151,
+          "stddev_ns": 757466,
+          "avg_ts": 8.049123,
+          "stddev_ts": 9.6e-05,
+          "samples_ns": [
+            63609155414,
+            63610263384,
+            63608814655
+          ],
+          "samples_ts": [
+            8.04916,
+            8.04902,
+            8.0492
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 437
+    },
+    {
+      "timestamp_utc": "2025-12-09T00:59:50.439141+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:56:06Z\",\n    \"avg_ns\": 44163994825,\n    \"stddev_ns\": 819910,\n    \"avg_ts\": 11.593154,\n    \"stddev_ts\": 0.000215,\n    \"samples_ns\": [ 44164029762, 44164796709, 44163158004 ],\n    \"samples_ts\": [ 11.5931, 11.5929, 11.5934 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:59:03Z\",\n    \"avg_ns\": 15680538362,\n    \"stddev_ns\": 485862,\n    \"avg_ts\": 8.162985,\n    \"stddev_ts\": 0.000244,\n    \"samples_ns\": [ 15681078749, 15680305161, 15680231177 ],\n    \"samples_ts\": [ 8.1627, 8.16311, 8.16314 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:56:06Z",
+          "avg_ns": 44163994825,
+          "stddev_ns": 819910,
+          "avg_ts": 11.593154,
+          "stddev_ts": 0.000215,
+          "samples_ns": [
+            44164029762,
+            44164796709,
+            44163158004
+          ],
+          "samples_ts": [
+            11.5931,
+            11.5929,
+            11.5934
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:59:03Z",
+          "avg_ns": 15680538362,
+          "stddev_ns": 485862,
+          "avg_ts": 8.162985,
+          "stddev_ts": 0.000244,
+          "samples_ns": [
+            15681078749,
+            15680305161,
+            15680231177
+          ],
+          "samples_ts": [
+            8.1627,
+            8.16311,
+            8.16314
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 438
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:05:58.201115+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T00:59:51Z\",\n    \"avg_ns\": 44172862291,\n    \"stddev_ns\": 2964244,\n    \"avg_ts\": 11.590827,\n    \"stddev_ts\": 0.000778,\n    \"samples_ns\": [ 44176252586, 44170759503, 44171574784 ],\n    \"samples_ts\": [ 11.5899, 11.5914, 11.5912 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:02:48Z\",\n    \"avg_ns\": 63324162500,\n    \"stddev_ns\": 3417222,\n    \"avg_ts\": 8.085381,\n    \"stddev_ts\": 0.000434,\n    \"samples_ns\": [ 63326973491, 63320385401, 63325128610 ],\n    \"samples_ts\": [ 8.08502, 8.08586, 8.08526 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T00:59:51Z",
+          "avg_ns": 44172862291,
+          "stddev_ns": 2964244,
+          "avg_ts": 11.590827,
+          "stddev_ts": 0.000778,
+          "samples_ns": [
+            44176252586,
+            44170759503,
+            44171574784
+          ],
+          "samples_ts": [
+            11.5899,
+            11.5914,
+            11.5912
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:02:48Z",
+          "avg_ns": 63324162500,
+          "stddev_ns": 3417222,
+          "avg_ts": 8.085381,
+          "stddev_ts": 0.000434,
+          "samples_ns": [
+            63326973491,
+            63320385401,
+            63325128610
+          ],
+          "samples_ts": [
+            8.08502,
+            8.08586,
+            8.08526
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 439
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:07:30.458184+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:05:59Z\",\n    \"avg_ns\": 10979049225,\n    \"stddev_ns\": 749696,\n    \"avg_ts\": 11.658569,\n    \"stddev_ts\": 0.000788,\n    \"samples_ns\": [ 10979903103, 10978557203, 10978687370 ],\n    \"samples_ts\": [ 11.6577, 11.6591, 11.659 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:06:42Z\",\n    \"avg_ns\": 15760287870,\n    \"stddev_ns\": 1339820,\n    \"avg_ts\": 8.121679,\n    \"stddev_ts\": 0.000687,\n    \"samples_ns\": [ 15760078172, 15759071213, 15761714226 ],\n    \"samples_ts\": [ 8.12179, 8.12231, 8.12094 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:05:59Z",
+          "avg_ns": 10979049225,
+          "stddev_ns": 749696,
+          "avg_ts": 11.658569,
+          "stddev_ts": 0.000788,
+          "samples_ns": [
+            10979903103,
+            10978557203,
+            10978687370
+          ],
+          "samples_ts": [
+            11.6577,
+            11.6591,
+            11.659
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:06:42Z",
+          "avg_ns": 15760287870,
+          "stddev_ns": 1339820,
+          "avg_ts": 8.121679,
+          "stddev_ts": 0.000687,
+          "samples_ns": [
+            15760078172,
+            15759071213,
+            15761714226
+          ],
+          "samples_ts": [
+            8.12179,
+            8.12231,
+            8.12094
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 440
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:11:25.781124+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:07:31Z\",\n    \"avg_ns\": 10978729294,\n    \"stddev_ns\": 1182006,\n    \"avg_ts\": 11.658909,\n    \"stddev_ts\": 0.001245,\n    \"samples_ns\": [ 10978667821, 10979931504, 10977588559 ],\n    \"samples_ts\": [ 11.659, 11.6576, 11.6601 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:08:15Z\",\n    \"avg_ns\": 63450406255,\n    \"stddev_ns\": 2026402,\n    \"avg_ts\": 8.069294,\n    \"stddev_ts\": 0.000258,\n    \"samples_ns\": [ 63448945424, 63449553703, 63452719638 ],\n    \"samples_ts\": [ 8.06948, 8.0694, 8.069 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:07:31Z",
+          "avg_ns": 10978729294,
+          "stddev_ns": 1182006,
+          "avg_ts": 11.658909,
+          "stddev_ts": 0.001245,
+          "samples_ns": [
+            10978667821,
+            10979931504,
+            10977588559
+          ],
+          "samples_ts": [
+            11.659,
+            11.6576,
+            11.6601
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:08:15Z",
+          "avg_ns": 63450406255,
+          "stddev_ns": 2026402,
+          "avg_ts": 8.069294,
+          "stddev_ts": 0.000258,
+          "samples_ns": [
+            63448945424,
+            63449553703,
+            63452719638
+          ],
+          "samples_ts": [
+            8.06948,
+            8.0694,
+            8.069
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 441
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:15:12.242188+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:11:26Z\",\n    \"avg_ns\": 44571205395,\n    \"stddev_ns\": 1041669,\n    \"avg_ts\": 11.487237,\n    \"stddev_ts\": 0.000257,\n    \"samples_ns\": [ 44571430992, 44572071251, 44570113944 ],\n    \"samples_ts\": [ 11.4872, 11.487, 11.4875 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:14:24Z\",\n    \"avg_ns\": 15707462552,\n    \"stddev_ns\": 1458659,\n    \"avg_ts\": 8.148993,\n    \"stddev_ts\": 0.000757,\n    \"samples_ns\": [ 15707996633, 15708578897, 15705812126 ],\n    \"samples_ts\": [ 8.14872, 8.14841, 8.14985 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:11:26Z",
+          "avg_ns": 44571205395,
+          "stddev_ns": 1041669,
+          "avg_ts": 11.487237,
+          "stddev_ts": 0.000257,
+          "samples_ns": [
+            44571430992,
+            44572071251,
+            44570113944
+          ],
+          "samples_ts": [
+            11.4872,
+            11.487,
+            11.4875
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:14:24Z",
+          "avg_ns": 15707462552,
+          "stddev_ns": 1458659,
+          "avg_ts": 8.148993,
+          "stddev_ts": 0.000757,
+          "samples_ns": [
+            15707996633,
+            15708578897,
+            15705812126
+          ],
+          "samples_ts": [
+            8.14872,
+            8.14841,
+            8.14985
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 442
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:21:21.301399+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:15:13Z\",\n    \"avg_ns\": 44528824822,\n    \"stddev_ns\": 1838079,\n    \"avg_ts\": 11.498170,\n    \"stddev_ts\": 0.000468,\n    \"samples_ns\": [ 44530120765, 44529601590, 44526752113 ],\n    \"samples_ts\": [ 11.4978, 11.498, 11.4987 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:18:11Z\",\n    \"avg_ns\": 63294550717,\n    \"stddev_ns\": 3115813,\n    \"avg_ts\": 8.089164,\n    \"stddev_ts\": 0.000397,\n    \"samples_ns\": [ 63298072551, 63293375048, 63292204553 ],\n    \"samples_ts\": [ 8.08871, 8.08931, 8.08946 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:15:13Z",
+          "avg_ns": 44528824822,
+          "stddev_ns": 1838079,
+          "avg_ts": 11.49817,
+          "stddev_ts": 0.000468,
+          "samples_ns": [
+            44530120765,
+            44529601590,
+            44526752113
+          ],
+          "samples_ts": [
+            11.4978,
+            11.498,
+            11.4987
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:18:11Z",
+          "avg_ns": 63294550717,
+          "stddev_ns": 3115813,
+          "avg_ts": 8.089164,
+          "stddev_ts": 0.000397,
+          "samples_ns": [
+            63298072551,
+            63293375048,
+            63292204553
+          ],
+          "samples_ts": [
+            8.08871,
+            8.08931,
+            8.08946
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 443
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:22:53.360610+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:21:22Z\",\n    \"avg_ns\": 10979345018,\n    \"stddev_ns\": 719707,\n    \"avg_ts\": 11.658255,\n    \"stddev_ts\": 0.000748,\n    \"samples_ns\": [ 10978569601, 10979520440, 10979945015 ],\n    \"samples_ts\": [ 11.6591, 11.6581, 11.6576 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:22:06Z\",\n    \"avg_ns\": 15697248414,\n    \"stddev_ns\": 1239686,\n    \"avg_ts\": 8.154295,\n    \"stddev_ts\": 0.000644,\n    \"samples_ns\": [ 15698679318, 15696567717, 15696498207 ],\n    \"samples_ts\": [ 8.15355, 8.15465, 8.15469 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:21:22Z",
+          "avg_ns": 10979345018,
+          "stddev_ns": 719707,
+          "avg_ts": 11.658255,
+          "stddev_ts": 0.000748,
+          "samples_ns": [
+            10978569601,
+            10979520440,
+            10979945015
+          ],
+          "samples_ts": [
+            11.6591,
+            11.6581,
+            11.6576
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:22:06Z",
+          "avg_ns": 15697248414,
+          "stddev_ns": 1239686,
+          "avg_ts": 8.154295,
+          "stddev_ts": 0.000644,
+          "samples_ns": [
+            15698679318,
+            15696567717,
+            15696498207
+          ],
+          "samples_ts": [
+            8.15355,
+            8.15465,
+            8.15469
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 444
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:26:49.185757+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:22:54Z\",\n    \"avg_ns\": 10978693419,\n    \"stddev_ns\": 742099,\n    \"avg_ts\": 11.658947,\n    \"stddev_ts\": 0.000772,\n    \"samples_ns\": [ 10979511414, 10978448492, 10978120353 ],\n    \"samples_ts\": [ 11.6581, 11.6592, 11.6596 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:23:38Z\",\n    \"avg_ns\": 63616565520,\n    \"stddev_ns\": 4631889,\n    \"avg_ts\": 8.048218,\n    \"stddev_ts\": 0.000586,\n    \"samples_ns\": [ 63621772655, 63615019643, 63612904262 ],\n    \"samples_ts\": [ 8.04756, 8.04841, 8.04868 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:22:54Z",
+          "avg_ns": 10978693419,
+          "stddev_ns": 742099,
+          "avg_ts": 11.658947,
+          "stddev_ts": 0.000772,
+          "samples_ns": [
+            10979511414,
+            10978448492,
+            10978120353
+          ],
+          "samples_ts": [
+            11.6581,
+            11.6592,
+            11.6596
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:23:38Z",
+          "avg_ns": 63616565520,
+          "stddev_ns": 4631889,
+          "avg_ts": 8.048218,
+          "stddev_ts": 0.000586,
+          "samples_ns": [
+            63621772655,
+            63615019643,
+            63612904262
+          ],
+          "samples_ts": [
+            8.04756,
+            8.04841,
+            8.04868
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 445
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:30:33.935965+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:26:50Z\",\n    \"avg_ns\": 44165568390,\n    \"stddev_ns\": 4523972,\n    \"avg_ts\": 11.592741,\n    \"stddev_ts\": 0.001187,\n    \"samples_ns\": [ 44169857071, 44166007021, 44160841078 ],\n    \"samples_ts\": [ 11.5916, 11.5926, 11.594 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:29:46Z\",\n    \"avg_ns\": 15676105994,\n    \"stddev_ns\": 1326118,\n    \"avg_ts\": 8.165293,\n    \"stddev_ts\": 0.000688,\n    \"samples_ns\": [ 15677410860, 15676136137, 15674770986 ],\n    \"samples_ts\": [ 8.16461, 8.16528, 8.16599 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:26:50Z",
+          "avg_ns": 44165568390,
+          "stddev_ns": 4523972,
+          "avg_ts": 11.592741,
+          "stddev_ts": 0.001187,
+          "samples_ns": [
+            44169857071,
+            44166007021,
+            44160841078
+          ],
+          "samples_ts": [
+            11.5916,
+            11.5926,
+            11.594
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:29:46Z",
+          "avg_ns": 15676105994,
+          "stddev_ns": 1326118,
+          "avg_ts": 8.165293,
+          "stddev_ts": 0.000688,
+          "samples_ns": [
+            15677410860,
+            15676136137,
+            15674770986
+          ],
+          "samples_ts": [
+            8.16461,
+            8.16528,
+            8.16599
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 446
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:36:41.537505+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:30:34Z\",\n    \"avg_ns\": 44161054705,\n    \"stddev_ns\": 2043254,\n    \"avg_ts\": 11.593926,\n    \"stddev_ts\": 0.000531,\n    \"samples_ns\": [ 44161274150, 44162957556, 44158932411 ],\n    \"samples_ts\": [ 11.5939, 11.5934, 11.5945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:33:31Z\",\n    \"avg_ns\": 63301623875,\n    \"stddev_ns\": 1815355,\n    \"avg_ts\": 8.088260,\n    \"stddev_ts\": 0.000232,\n    \"samples_ns\": [ 63302427670, 63299545389, 63302898566 ],\n    \"samples_ts\": [ 8.08816, 8.08853, 8.0881 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:30:34Z",
+          "avg_ns": 44161054705,
+          "stddev_ns": 2043254,
+          "avg_ts": 11.593926,
+          "stddev_ts": 0.000531,
+          "samples_ns": [
+            44161274150,
+            44162957556,
+            44158932411
+          ],
+          "samples_ts": [
+            11.5939,
+            11.5934,
+            11.5945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:33:31Z",
+          "avg_ns": 63301623875,
+          "stddev_ns": 1815355,
+          "avg_ts": 8.08826,
+          "stddev_ts": 0.000232,
+          "samples_ns": [
+            63302427670,
+            63299545389,
+            63302898566
+          ],
+          "samples_ts": [
+            8.08816,
+            8.08853,
+            8.0881
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 447
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:38:13.646315+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:36:42Z\",\n    \"avg_ns\": 10974235568,\n    \"stddev_ns\": 2413796,\n    \"avg_ts\": 11.663683,\n    \"stddev_ts\": 0.002565,\n    \"samples_ns\": [ 10977009891, 10973080272, 10972616541 ],\n    \"samples_ts\": [ 11.6607, 11.6649, 11.6654 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:37:26Z\",\n    \"avg_ns\": 15718845871,\n    \"stddev_ns\": 383875,\n    \"avg_ts\": 8.143091,\n    \"stddev_ts\": 0.000176,\n    \"samples_ns\": [ 15719149137, 15718477570, 15718910908 ],\n    \"samples_ts\": [ 8.14293, 8.14328, 8.14306 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:36:42Z",
+          "avg_ns": 10974235568,
+          "stddev_ns": 2413796,
+          "avg_ts": 11.663683,
+          "stddev_ts": 0.002565,
+          "samples_ns": [
+            10977009891,
+            10973080272,
+            10972616541
+          ],
+          "samples_ts": [
+            11.6607,
+            11.6649,
+            11.6654
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:37:26Z",
+          "avg_ns": 15718845871,
+          "stddev_ns": 383875,
+          "avg_ts": 8.143091,
+          "stddev_ts": 0.000176,
+          "samples_ns": [
+            15719149137,
+            15718477570,
+            15718910908
+          ],
+          "samples_ts": [
+            8.14293,
+            8.14328,
+            8.14306
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 448
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:42:09.239337+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:38:14Z\",\n    \"avg_ns\": 10978326198,\n    \"stddev_ns\": 735799,\n    \"avg_ts\": 11.659337,\n    \"stddev_ts\": 0.000774,\n    \"samples_ns\": [ 10977485386, 10978732517, 10978760692 ],\n    \"samples_ts\": [ 11.6602, 11.6589, 11.6589 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:38:58Z\",\n    \"avg_ns\": 63541656509,\n    \"stddev_ns\": 1860566,\n    \"avg_ts\": 8.057706,\n    \"stddev_ts\": 0.000232,\n    \"samples_ns\": [ 63542568044, 63542847392, 63539554093 ],\n    \"samples_ts\": [ 8.05759, 8.05756, 8.05797 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:38:14Z",
+          "avg_ns": 10978326198,
+          "stddev_ns": 735799,
+          "avg_ts": 11.659337,
+          "stddev_ts": 0.000774,
+          "samples_ns": [
+            10977485386,
+            10978732517,
+            10978760692
+          ],
+          "samples_ts": [
+            11.6602,
+            11.6589,
+            11.6589
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:38:58Z",
+          "avg_ns": 63541656509,
+          "stddev_ns": 1860566,
+          "avg_ts": 8.057706,
+          "stddev_ts": 0.000232,
+          "samples_ns": [
+            63542568044,
+            63542847392,
+            63539554093
+          ],
+          "samples_ts": [
+            8.05759,
+            8.05756,
+            8.05797
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 449
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:45:54.137055+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:42:10Z\",\n    \"avg_ns\": 44186558829,\n    \"stddev_ns\": 2420634,\n    \"avg_ts\": 11.587234,\n    \"stddev_ts\": 0.000632,\n    \"samples_ns\": [ 44188307664, 44183807854, 44187560970 ],\n    \"samples_ts\": [ 11.5868, 11.588, 11.587 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:45:06Z\",\n    \"avg_ns\": 15698512609,\n    \"stddev_ns\": 1309984,\n    \"avg_ts\": 8.153639,\n    \"stddev_ts\": 0.000680,\n    \"samples_ns\": [ 15698008350, 15699999790, 15697529687 ],\n    \"samples_ts\": [ 8.1539, 8.15287, 8.15415 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:42:10Z",
+          "avg_ns": 44186558829,
+          "stddev_ns": 2420634,
+          "avg_ts": 11.587234,
+          "stddev_ts": 0.000632,
+          "samples_ns": [
+            44188307664,
+            44183807854,
+            44187560970
+          ],
+          "samples_ts": [
+            11.5868,
+            11.588,
+            11.587
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:45:06Z",
+          "avg_ns": 15698512609,
+          "stddev_ns": 1309984,
+          "avg_ts": 8.153639,
+          "stddev_ts": 0.00068,
+          "samples_ns": [
+            15698008350,
+            15699999790,
+            15697529687
+          ],
+          "samples_ts": [
+            8.1539,
+            8.15287,
+            8.15415
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 450
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:52:01.964938+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:45:54Z\",\n    \"avg_ns\": 44168338621,\n    \"stddev_ns\": 1302838,\n    \"avg_ts\": 11.592014,\n    \"stddev_ts\": 0.000342,\n    \"samples_ns\": [ 44169773054, 44167228735, 44168014074 ],\n    \"samples_ts\": [ 11.5916, 11.5923, 11.5921 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:48:51Z\",\n    \"avg_ns\": 63363276963,\n    \"stddev_ns\": 1722740,\n    \"avg_ts\": 8.080390,\n    \"stddev_ts\": 0.000220,\n    \"samples_ns\": [ 63365014498, 63363246983, 63361569408 ],\n    \"samples_ts\": [ 8.08017, 8.08039, 8.08061 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:45:54Z",
+          "avg_ns": 44168338621,
+          "stddev_ns": 1302838,
+          "avg_ts": 11.592014,
+          "stddev_ts": 0.000342,
+          "samples_ns": [
+            44169773054,
+            44167228735,
+            44168014074
+          ],
+          "samples_ts": [
+            11.5916,
+            11.5923,
+            11.5921
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:48:51Z",
+          "avg_ns": 63363276963,
+          "stddev_ns": 1722740,
+          "avg_ts": 8.08039,
+          "stddev_ts": 0.00022,
+          "samples_ns": [
+            63365014498,
+            63363246983,
+            63361569408
+          ],
+          "samples_ts": [
+            8.08017,
+            8.08039,
+            8.08061
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 451
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:53:34.078626+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:52:02Z\",\n    \"avg_ns\": 10970436941,\n    \"stddev_ns\": 641953,\n    \"avg_ts\": 11.667721,\n    \"stddev_ts\": 0.000664,\n    \"samples_ns\": [ 10970632074, 10970940712, 10969738039 ],\n    \"samples_ts\": [ 11.6675, 11.6672, 11.6685 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:52:46Z\",\n    \"avg_ns\": 15727028715,\n    \"stddev_ns\": 3695887,\n    \"avg_ts\": 8.138855,\n    \"stddev_ts\": 0.001910,\n    \"samples_ns\": [ 15727003641, 15730732819, 15723349687 ],\n    \"samples_ts\": [ 8.13887, 8.13694, 8.14076 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:52:02Z",
+          "avg_ns": 10970436941,
+          "stddev_ns": 641953,
+          "avg_ts": 11.667721,
+          "stddev_ts": 0.000664,
+          "samples_ns": [
+            10970632074,
+            10970940712,
+            10969738039
+          ],
+          "samples_ts": [
+            11.6675,
+            11.6672,
+            11.6685
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:52:46Z",
+          "avg_ns": 15727028715,
+          "stddev_ns": 3695887,
+          "avg_ts": 8.138855,
+          "stddev_ts": 0.00191,
+          "samples_ns": [
+            15727003641,
+            15730732819,
+            15723349687
+          ],
+          "samples_ts": [
+            8.13887,
+            8.13694,
+            8.14076
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 452
+    },
+    {
+      "timestamp_utc": "2025-12-09T01:57:29.621592+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:53:34Z\",\n    \"avg_ns\": 10977246463,\n    \"stddev_ns\": 1289433,\n    \"avg_ts\": 11.660483,\n    \"stddev_ts\": 0.001370,\n    \"samples_ns\": [ 10978498344, 10977318544, 10975922501 ],\n    \"samples_ts\": [ 11.6592, 11.6604, 11.6619 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:54:18Z\",\n    \"avg_ns\": 63525754857,\n    \"stddev_ns\": 496945,\n    \"avg_ts\": 8.059723,\n    \"stddev_ts\": 0.000054,\n    \"samples_ns\": [ 63525302297, 63526153824, 63525808451 ],\n    \"samples_ts\": [ 8.05978, 8.05967, 8.05972 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:53:34Z",
+          "avg_ns": 10977246463,
+          "stddev_ns": 1289433,
+          "avg_ts": 11.660483,
+          "stddev_ts": 0.00137,
+          "samples_ns": [
+            10978498344,
+            10977318544,
+            10975922501
+          ],
+          "samples_ts": [
+            11.6592,
+            11.6604,
+            11.6619
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:54:18Z",
+          "avg_ns": 63525754857,
+          "stddev_ns": 496945,
+          "avg_ts": 8.059723,
+          "stddev_ts": 5.4e-05,
+          "samples_ns": [
+            63525302297,
+            63526153824,
+            63525808451
+          ],
+          "samples_ts": [
+            8.05978,
+            8.05967,
+            8.05972
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 453
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:01:16.146035+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T01:57:30Z\",\n    \"avg_ns\": 44526698171,\n    \"stddev_ns\": 1585785,\n    \"avg_ts\": 11.498719,\n    \"stddev_ts\": 0.000406,\n    \"samples_ns\": [ 44526425964, 44528388179, 44525280371 ],\n    \"samples_ts\": [ 11.4988, 11.4983, 11.4991 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:00:28Z\",\n    \"avg_ns\": 15767425865,\n    \"stddev_ns\": 3553359,\n    \"avg_ts\": 8.118003,\n    \"stddev_ts\": 0.001829,\n    \"samples_ns\": [ 15764715973, 15766112717, 15771448905 ],\n    \"samples_ts\": [ 8.1194, 8.11868, 8.11593 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T01:57:30Z",
+          "avg_ns": 44526698171,
+          "stddev_ns": 1585785,
+          "avg_ts": 11.498719,
+          "stddev_ts": 0.000406,
+          "samples_ns": [
+            44526425964,
+            44528388179,
+            44525280371
+          ],
+          "samples_ts": [
+            11.4988,
+            11.4983,
+            11.4991
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:00:28Z",
+          "avg_ns": 15767425865,
+          "stddev_ns": 3553359,
+          "avg_ts": 8.118003,
+          "stddev_ts": 0.001829,
+          "samples_ns": [
+            15764715973,
+            15766112717,
+            15771448905
+          ],
+          "samples_ts": [
+            8.1194,
+            8.11868,
+            8.11593
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 454
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:07:26.496787+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:01:17Z\",\n    \"avg_ns\": 44540252571,\n    \"stddev_ns\": 715265,\n    \"avg_ts\": 11.495220,\n    \"stddev_ts\": 0.000176,\n    \"samples_ns\": [ 44540811745, 44539490741, 44540455228 ],\n    \"samples_ts\": [ 11.4951, 11.4954, 11.4952 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:04:15Z\",\n    \"avg_ns\": 63691607744,\n    \"stddev_ns\": 16596049,\n    \"avg_ts\": 8.038736,\n    \"stddev_ts\": 0.002094,\n    \"samples_ns\": [ 63686006871, 63678540789, 63710275574 ],\n    \"samples_ts\": [ 8.03944, 8.04039, 8.03638 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:01:17Z",
+          "avg_ns": 44540252571,
+          "stddev_ns": 715265,
+          "avg_ts": 11.49522,
+          "stddev_ts": 0.000176,
+          "samples_ns": [
+            44540811745,
+            44539490741,
+            44540455228
+          ],
+          "samples_ts": [
+            11.4951,
+            11.4954,
+            11.4952
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:04:15Z",
+          "avg_ns": 63691607744,
+          "stddev_ns": 16596049,
+          "avg_ts": 8.038736,
+          "stddev_ts": 0.002094,
+          "samples_ns": [
+            63686006871,
+            63678540789,
+            63710275574
+          ],
+          "samples_ts": [
+            8.03944,
+            8.04039,
+            8.03638
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 455
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:08:58.529578+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:07:27Z\",\n    \"avg_ns\": 10976500971,\n    \"stddev_ns\": 730455,\n    \"avg_ts\": 11.661275,\n    \"stddev_ts\": 0.000776,\n    \"samples_ns\": [ 10976589477, 10977183141, 10975730295 ],\n    \"samples_ts\": [ 11.6612, 11.6606, 11.6621 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:08:11Z\",\n    \"avg_ns\": 15688250544,\n    \"stddev_ns\": 1221037,\n    \"avg_ts\": 8.158972,\n    \"stddev_ts\": 0.000632,\n    \"samples_ns\": [ 15689078081, 15688817404, 15686856148 ],\n    \"samples_ts\": [ 8.15854, 8.15868, 8.1597 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:07:27Z",
+          "avg_ns": 10976500971,
+          "stddev_ns": 730455,
+          "avg_ts": 11.661275,
+          "stddev_ts": 0.000776,
+          "samples_ns": [
+            10976589477,
+            10977183141,
+            10975730295
+          ],
+          "samples_ts": [
+            11.6612,
+            11.6606,
+            11.6621
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:08:11Z",
+          "avg_ns": 15688250544,
+          "stddev_ns": 1221037,
+          "avg_ts": 8.158972,
+          "stddev_ts": 0.000632,
+          "samples_ns": [
+            15689078081,
+            15688817404,
+            15686856148
+          ],
+          "samples_ts": [
+            8.15854,
+            8.15868,
+            8.1597
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 456
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:12:53.627042+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:08:59Z\",\n    \"avg_ns\": 10983741346,\n    \"stddev_ns\": 995257,\n    \"avg_ts\": 11.653588,\n    \"stddev_ts\": 0.001056,\n    \"samples_ns\": [ 10983650384, 10984778962, 10982794692 ],\n    \"samples_ts\": [ 11.6537, 11.6525, 11.6546 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:09:43Z\",\n    \"avg_ns\": 63368580588,\n    \"stddev_ns\": 1050229,\n    \"avg_ts\": 8.079714,\n    \"stddev_ts\": 0.000130,\n    \"samples_ns\": [ 63367543161, 63368617200, 63369581404 ],\n    \"samples_ts\": [ 8.07985, 8.07971, 8.07959 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:08:59Z",
+          "avg_ns": 10983741346,
+          "stddev_ns": 995257,
+          "avg_ts": 11.653588,
+          "stddev_ts": 0.001056,
+          "samples_ns": [
+            10983650384,
+            10984778962,
+            10982794692
+          ],
+          "samples_ts": [
+            11.6537,
+            11.6525,
+            11.6546
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:09:43Z",
+          "avg_ns": 63368580588,
+          "stddev_ns": 1050229,
+          "avg_ts": 8.079714,
+          "stddev_ts": 0.00013,
+          "samples_ns": [
+            63367543161,
+            63368617200,
+            63369581404
+          ],
+          "samples_ts": [
+            8.07985,
+            8.07971,
+            8.07959
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 457
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:16:38.384516+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:12:54Z\",\n    \"avg_ns\": 44164525338,\n    \"stddev_ns\": 1681180,\n    \"avg_ts\": 11.593015,\n    \"stddev_ts\": 0.000441,\n    \"samples_ns\": [ 44165851025, 44162634372, 44165090617 ],\n    \"samples_ts\": [ 11.5927, 11.5935, 11.5929 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:15:51Z\",\n    \"avg_ns\": 15681183575,\n    \"stddev_ns\": 2198619,\n    \"avg_ts\": 8.162649,\n    \"stddev_ts\": 0.001141,\n    \"samples_ns\": [ 15683462683, 15680996291, 15679091753 ],\n    \"samples_ts\": [ 8.16146, 8.16275, 8.16374 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:12:54Z",
+          "avg_ns": 44164525338,
+          "stddev_ns": 1681180,
+          "avg_ts": 11.593015,
+          "stddev_ts": 0.000441,
+          "samples_ns": [
+            44165851025,
+            44162634372,
+            44165090617
+          ],
+          "samples_ts": [
+            11.5927,
+            11.5935,
+            11.5929
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:15:51Z",
+          "avg_ns": 15681183575,
+          "stddev_ns": 2198619,
+          "avg_ts": 8.162649,
+          "stddev_ts": 0.001141,
+          "samples_ns": [
+            15683462683,
+            15680996291,
+            15679091753
+          ],
+          "samples_ts": [
+            8.16146,
+            8.16275,
+            8.16374
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 458
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:22:46.058379+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:16:39Z\",\n    \"avg_ns\": 44157876857,\n    \"stddev_ns\": 635881,\n    \"avg_ts\": 11.594760,\n    \"stddev_ts\": 0.000167,\n    \"samples_ns\": [ 44157217823, 44158486733, 44157926015 ],\n    \"samples_ts\": [ 11.5949, 11.5946, 11.5947 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:19:35Z\",\n    \"avg_ns\": 63326367645,\n    \"stddev_ns\": 2885835,\n    \"avg_ts\": 8.085100,\n    \"stddev_ts\": 0.000367,\n    \"samples_ns\": [ 63323048503, 63328074017, 63327980416 ],\n    \"samples_ts\": [ 8.08552, 8.08488, 8.08489 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:16:39Z",
+          "avg_ns": 44157876857,
+          "stddev_ns": 635881,
+          "avg_ts": 11.59476,
+          "stddev_ts": 0.000167,
+          "samples_ns": [
+            44157217823,
+            44158486733,
+            44157926015
+          ],
+          "samples_ts": [
+            11.5949,
+            11.5946,
+            11.5947
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:19:35Z",
+          "avg_ns": 63326367645,
+          "stddev_ns": 2885835,
+          "avg_ts": 8.0851,
+          "stddev_ts": 0.000367,
+          "samples_ns": [
+            63323048503,
+            63328074017,
+            63327980416
+          ],
+          "samples_ts": [
+            8.08552,
+            8.08488,
+            8.08489
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 459
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:24:18.092813+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:22:46Z\",\n    \"avg_ns\": 10980594625,\n    \"stddev_ns\": 697070,\n    \"avg_ts\": 11.656928,\n    \"stddev_ts\": 0.000723,\n    \"samples_ns\": [ 10980381973, 10981356719, 10980045185 ],\n    \"samples_ts\": [ 11.6572, 11.6561, 11.6575 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:23:30Z\",\n    \"avg_ns\": 15677851227,\n    \"stddev_ns\": 625707,\n    \"avg_ts\": 8.164384,\n    \"stddev_ts\": 0.000326,\n    \"samples_ns\": [ 15678571063, 15677545036, 15677437582 ],\n    \"samples_ts\": [ 8.16401, 8.16454, 8.1646 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:22:46Z",
+          "avg_ns": 10980594625,
+          "stddev_ns": 697070,
+          "avg_ts": 11.656928,
+          "stddev_ts": 0.000723,
+          "samples_ns": [
+            10980381973,
+            10981356719,
+            10980045185
+          ],
+          "samples_ts": [
+            11.6572,
+            11.6561,
+            11.6575
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:23:30Z",
+          "avg_ns": 15677851227,
+          "stddev_ns": 625707,
+          "avg_ts": 8.164384,
+          "stddev_ts": 0.000326,
+          "samples_ns": [
+            15678571063,
+            15677545036,
+            15677437582
+          ],
+          "samples_ts": [
+            8.16401,
+            8.16454,
+            8.1646
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 460
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:28:13.031218+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:24:18Z\",\n    \"avg_ns\": 10976604209,\n    \"stddev_ns\": 1112389,\n    \"avg_ts\": 11.661166,\n    \"stddev_ts\": 0.001171,\n    \"samples_ns\": [ 10975744143, 10977847060, 10976221426 ],\n    \"samples_ts\": [ 11.6621, 11.6598, 11.6616 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:25:02Z\",\n    \"avg_ns\": 63324478218,\n    \"stddev_ns\": 1082122,\n    \"avg_ts\": 8.085341,\n    \"stddev_ts\": 0.000138,\n    \"samples_ns\": [ 63324848100, 63325326902, 63323259652 ],\n    \"samples_ts\": [ 8.08529, 8.08523, 8.0855 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:24:18Z",
+          "avg_ns": 10976604209,
+          "stddev_ns": 1112389,
+          "avg_ts": 11.661166,
+          "stddev_ts": 0.001171,
+          "samples_ns": [
+            10975744143,
+            10977847060,
+            10976221426
+          ],
+          "samples_ts": [
+            11.6621,
+            11.6598,
+            11.6616
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:25:02Z",
+          "avg_ns": 63324478218,
+          "stddev_ns": 1082122,
+          "avg_ts": 8.085341,
+          "stddev_ts": 0.000138,
+          "samples_ns": [
+            63324848100,
+            63325326902,
+            63323259652
+          ],
+          "samples_ts": [
+            8.08529,
+            8.08523,
+            8.0855
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 461
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:31:57.955751+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:28:13Z\",\n    \"avg_ns\": 44175735318,\n    \"stddev_ns\": 3067510,\n    \"avg_ts\": 11.590073,\n    \"stddev_ts\": 0.000805,\n    \"samples_ns\": [ 44178958542, 44175395611, 44172851801 ],\n    \"samples_ts\": [ 11.5892, 11.5902, 11.5908 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:31:10Z\",\n    \"avg_ns\": 15689176530,\n    \"stddev_ns\": 3102817,\n    \"avg_ts\": 8.158491,\n    \"stddev_ts\": 0.001612,\n    \"samples_ns\": [ 15686557076, 15692599461, 15688373054 ],\n    \"samples_ts\": [ 8.15985, 8.15671, 8.15891 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:28:13Z",
+          "avg_ns": 44175735318,
+          "stddev_ns": 3067510,
+          "avg_ts": 11.590073,
+          "stddev_ts": 0.000805,
+          "samples_ns": [
+            44178958542,
+            44175395611,
+            44172851801
+          ],
+          "samples_ts": [
+            11.5892,
+            11.5902,
+            11.5908
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:31:10Z",
+          "avg_ns": 15689176530,
+          "stddev_ns": 3102817,
+          "avg_ts": 8.158491,
+          "stddev_ts": 0.001612,
+          "samples_ns": [
+            15686557076,
+            15692599461,
+            15688373054
+          ],
+          "samples_ts": [
+            8.15985,
+            8.15671,
+            8.15891
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 462
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:38:06.165686+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:31:58Z\",\n    \"avg_ns\": 44197833817,\n    \"stddev_ns\": 1676768,\n    \"avg_ts\": 11.584278,\n    \"stddev_ts\": 0.000439,\n    \"samples_ns\": [ 44199191473, 44198350452, 44195959526 ],\n    \"samples_ts\": [ 11.5839, 11.5841, 11.5848 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:34:55Z\",\n    \"avg_ns\": 63452033611,\n    \"stddev_ns\": 6360740,\n    \"avg_ts\": 8.069087,\n    \"stddev_ts\": 0.000808,\n    \"samples_ns\": [ 63459366340, 63448441000, 63448293495 ],\n    \"samples_ts\": [ 8.06815, 8.06954, 8.06956 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:31:58Z",
+          "avg_ns": 44197833817,
+          "stddev_ns": 1676768,
+          "avg_ts": 11.584278,
+          "stddev_ts": 0.000439,
+          "samples_ns": [
+            44199191473,
+            44198350452,
+            44195959526
+          ],
+          "samples_ts": [
+            11.5839,
+            11.5841,
+            11.5848
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:34:55Z",
+          "avg_ns": 63452033611,
+          "stddev_ns": 6360740,
+          "avg_ts": 8.069087,
+          "stddev_ts": 0.000808,
+          "samples_ns": [
+            63459366340,
+            63448441000,
+            63448293495
+          ],
+          "samples_ts": [
+            8.06815,
+            8.06954,
+            8.06956
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 463
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:39:38.285290+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:38:07Z\",\n    \"avg_ns\": 10979814219,\n    \"stddev_ns\": 535662,\n    \"avg_ts\": 11.657756,\n    \"stddev_ts\": 0.000547,\n    \"samples_ns\": [ 10980406262, 10979563909, 10979472488 ],\n    \"samples_ts\": [ 11.6571, 11.658, 11.6581 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:38:51Z\",\n    \"avg_ns\": 15676503063,\n    \"stddev_ns\": 1215501,\n    \"avg_ts\": 8.165086,\n    \"stddev_ts\": 0.000626,\n    \"samples_ns\": [ 15676364958, 15677768689, 15675375544 ],\n    \"samples_ts\": [ 8.16516, 8.16443, 8.16567 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:38:07Z",
+          "avg_ns": 10979814219,
+          "stddev_ns": 535662,
+          "avg_ts": 11.657756,
+          "stddev_ts": 0.000547,
+          "samples_ns": [
+            10980406262,
+            10979563909,
+            10979472488
+          ],
+          "samples_ts": [
+            11.6571,
+            11.658,
+            11.6581
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:38:51Z",
+          "avg_ns": 15676503063,
+          "stddev_ns": 1215501,
+          "avg_ts": 8.165086,
+          "stddev_ts": 0.000626,
+          "samples_ns": [
+            15676364958,
+            15677768689,
+            15675375544
+          ],
+          "samples_ts": [
+            8.16516,
+            8.16443,
+            8.16567
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 464
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:43:34.189464+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:39:39Z\",\n    \"avg_ns\": 10974300323,\n    \"stddev_ns\": 1327485,\n    \"avg_ts\": 11.663614,\n    \"stddev_ts\": 0.001411,\n    \"samples_ns\": [ 10972801343, 10974772304, 10975327322 ],\n    \"samples_ts\": [ 11.6652, 11.6631, 11.6625 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:40:23Z\",\n    \"avg_ns\": 63646900098,\n    \"stddev_ns\": 2267497,\n    \"avg_ts\": 8.044382,\n    \"stddev_ts\": 0.000287,\n    \"samples_ns\": [ 63648243490, 63644282119, 63648174685 ],\n    \"samples_ts\": [ 8.04421, 8.04471, 8.04422 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:39:39Z",
+          "avg_ns": 10974300323,
+          "stddev_ns": 1327485,
+          "avg_ts": 11.663614,
+          "stddev_ts": 0.001411,
+          "samples_ns": [
+            10972801343,
+            10974772304,
+            10975327322
+          ],
+          "samples_ts": [
+            11.6652,
+            11.6631,
+            11.6625
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:40:23Z",
+          "avg_ns": 63646900098,
+          "stddev_ns": 2267497,
+          "avg_ts": 8.044382,
+          "stddev_ts": 0.000287,
+          "samples_ns": [
+            63648243490,
+            63644282119,
+            63648174685
+          ],
+          "samples_ts": [
+            8.04421,
+            8.04471,
+            8.04422
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 465
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:47:25.015848+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:43:35Z\",\n    \"avg_ns\": 45544316214,\n    \"stddev_ns\": 2220396,\n    \"avg_ts\": 11.241798,\n    \"stddev_ts\": 0.000548,\n    \"samples_ns\": [ 45544111103, 45546632049, 45542205490 ],\n    \"samples_ts\": [ 11.2418, 11.2412, 11.2423 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:46:37Z\",\n    \"avg_ns\": 15851901873,\n    \"stddev_ns\": 228136909,\n    \"avg_ts\": 8.075847,\n    \"stddev_ts\": 0.115268,\n    \"samples_ns\": [ 16115330494, 15719501563, 15720873562 ],\n    \"samples_ts\": [ 7.94275, 8.14275, 8.14204 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:43:35Z",
+          "avg_ns": 45544316214,
+          "stddev_ns": 2220396,
+          "avg_ts": 11.241798,
+          "stddev_ts": 0.000548,
+          "samples_ns": [
+            45544111103,
+            45546632049,
+            45542205490
+          ],
+          "samples_ts": [
+            11.2418,
+            11.2412,
+            11.2423
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:46:37Z",
+          "avg_ns": 15851901873,
+          "stddev_ns": 228136909,
+          "avg_ts": 8.075847,
+          "stddev_ts": 0.115268,
+          "samples_ns": [
+            16115330494,
+            15719501563,
+            15720873562
+          ],
+          "samples_ts": [
+            7.94275,
+            8.14275,
+            8.14204
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 466
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:53:34.810338+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:47:25Z\",\n    \"avg_ns\": 44549294304,\n    \"stddev_ns\": 612582,\n    \"avg_ts\": 11.492887,\n    \"stddev_ts\": 0.000138,\n    \"samples_ns\": [ 44548736911, 44549803529, 44549342474 ],\n    \"samples_ts\": [ 11.493, 11.4928, 11.4929 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:50:24Z\",\n    \"avg_ns\": 63504790957,\n    \"stddev_ns\": 12175827,\n    \"avg_ts\": 8.062384,\n    \"stddev_ts\": 0.001545,\n    \"samples_ns\": [ 63518725324, 63499404546, 63496243003 ],\n    \"samples_ts\": [ 8.06062, 8.06307, 8.06347 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:47:25Z",
+          "avg_ns": 44549294304,
+          "stddev_ns": 612582,
+          "avg_ts": 11.492887,
+          "stddev_ts": 0.000138,
+          "samples_ns": [
+            44548736911,
+            44549803529,
+            44549342474
+          ],
+          "samples_ts": [
+            11.493,
+            11.4928,
+            11.4929
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:50:24Z",
+          "avg_ns": 63504790957,
+          "stddev_ns": 12175827,
+          "avg_ts": 8.062384,
+          "stddev_ts": 0.001545,
+          "samples_ns": [
+            63518725324,
+            63499404546,
+            63496243003
+          ],
+          "samples_ts": [
+            8.06062,
+            8.06307,
+            8.06347
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 467
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:54:29.170758+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:53:35Z\",\n    \"avg_ns\": 5520131286,\n    \"stddev_ns\": 1778695,\n    \"avg_ts\": 23.187856,\n    \"stddev_ts\": 0.007471,\n    \"samples_ns\": [ 5522067445, 5519756690, 5518569723 ],\n    \"samples_ts\": [ 23.1797, 23.1894, 23.1944 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:53:57Z\",\n    \"avg_ns\": 10421636577,\n    \"stddev_ns\": 1791163,\n    \"avg_ts\": 12.282140,\n    \"stddev_ts\": 0.002107,\n    \"samples_ns\": [ 10423700035, 10420671567, 10420538130 ],\n    \"samples_ts\": [ 12.2797, 12.2833, 12.2834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:53:35Z",
+          "avg_ns": 5520131286,
+          "stddev_ns": 1778695,
+          "avg_ts": 23.187856,
+          "stddev_ts": 0.007471,
+          "samples_ns": [
+            5522067445,
+            5519756690,
+            5518569723
+          ],
+          "samples_ts": [
+            23.1797,
+            23.1894,
+            23.1944
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:53:57Z",
+          "avg_ns": 10421636577,
+          "stddev_ns": 1791163,
+          "avg_ts": 12.28214,
+          "stddev_ts": 0.002107,
+          "samples_ns": [
+            10423700035,
+            10420671567,
+            10420538130
+          ],
+          "samples_ts": [
+            12.2797,
+            12.2833,
+            12.2834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 468
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:56:57.623923+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:54:30Z\",\n    \"avg_ns\": 5519976204,\n    \"stddev_ns\": 1218462,\n    \"avg_ts\": 23.188507,\n    \"stddev_ts\": 0.005109,\n    \"samples_ns\": [ 5518992583, 5519599972, 5521336058 ],\n    \"samples_ts\": [ 23.1926, 23.1901, 23.1828 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:54:52Z\",\n    \"avg_ns\": 41779465276,\n    \"stddev_ns\": 3653125,\n    \"avg_ts\": 12.254824,\n    \"stddev_ts\": 0.001071,\n    \"samples_ns\": [ 41783635159, 41777932043, 41776828626 ],\n    \"samples_ts\": [ 12.2536, 12.2553, 12.2556 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:54:30Z",
+          "avg_ns": 5519976204,
+          "stddev_ns": 1218462,
+          "avg_ts": 23.188507,
+          "stddev_ts": 0.005109,
+          "samples_ns": [
+            5518992583,
+            5519599972,
+            5521336058
+          ],
+          "samples_ts": [
+            23.1926,
+            23.1901,
+            23.1828
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:54:52Z",
+          "avg_ns": 41779465276,
+          "stddev_ns": 3653125,
+          "avg_ts": 12.254824,
+          "stddev_ts": 0.001071,
+          "samples_ns": [
+            41783635159,
+            41777932043,
+            41776828626
+          ],
+          "samples_ts": [
+            12.2536,
+            12.2553,
+            12.2556
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 469
+    },
+    {
+      "timestamp_utc": "2025-12-09T02:58:59.484377+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:56:58Z\",\n    \"avg_ns\": 22364201636,\n    \"stddev_ns\": 151702901,\n    \"avg_ts\": 22.894435,\n    \"stddev_ts\": 0.155910,\n    \"samples_ns\": [ 22452422481, 22451150589, 22189031839 ],\n    \"samples_ts\": [ 22.8038, 22.8051, 23.0745 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:58:28Z\",\n    \"avg_ns\": 10424638889,\n    \"stddev_ns\": 2710395,\n    \"avg_ts\": 12.278603,\n    \"stddev_ts\": 0.003188,\n    \"samples_ns\": [ 10421982594, 10427393033, 10424541042 ],\n    \"samples_ts\": [ 12.2817, 12.2754, 12.2787 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:56:58Z",
+          "avg_ns": 22364201636,
+          "stddev_ns": 151702901,
+          "avg_ts": 22.894435,
+          "stddev_ts": 0.15591,
+          "samples_ns": [
+            22452422481,
+            22451150589,
+            22189031839
+          ],
+          "samples_ts": [
+            22.8038,
+            22.8051,
+            23.0745
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:58:28Z",
+          "avg_ns": 10424638889,
+          "stddev_ns": 2710395,
+          "avg_ts": 12.278603,
+          "stddev_ts": 0.003188,
+          "samples_ns": [
+            10421982594,
+            10427393033,
+            10424541042
+          ],
+          "samples_ts": [
+            12.2817,
+            12.2754,
+            12.2787
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 470
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:02:34.447478+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T02:59:00Z\",\n    \"avg_ns\": 22174778786,\n    \"stddev_ns\": 1052799,\n    \"avg_ts\": 23.089295,\n    \"stddev_ts\": 0.001085,\n    \"samples_ns\": [ 22173577030, 22175324447, 22175434882 ],\n    \"samples_ts\": [ 23.0905, 23.0887, 23.0886 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:00:29Z\",\n    \"avg_ns\": 41748720987,\n    \"stddev_ns\": 4381408,\n    \"avg_ts\": 12.263849,\n    \"stddev_ts\": 0.001287,\n    \"samples_ns\": [ 41750582193, 41743716237, 41751864531 ],\n    \"samples_ts\": [ 12.2633, 12.2653, 12.2629 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T02:59:00Z",
+          "avg_ns": 22174778786,
+          "stddev_ns": 1052799,
+          "avg_ts": 23.089295,
+          "stddev_ts": 0.001085,
+          "samples_ns": [
+            22173577030,
+            22175324447,
+            22175434882
+          ],
+          "samples_ts": [
+            23.0905,
+            23.0887,
+            23.0886
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:00:29Z",
+          "avg_ns": 41748720987,
+          "stddev_ns": 4381408,
+          "avg_ts": 12.263849,
+          "stddev_ts": 0.001287,
+          "samples_ns": [
+            41750582193,
+            41743716237,
+            41751864531
+          ],
+          "samples_ts": [
+            12.2633,
+            12.2653,
+            12.2629
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 471
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:03:28.589447+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:02:35Z\",\n    \"avg_ns\": 5520821691,\n    \"stddev_ns\": 308122,\n    \"avg_ts\": 23.184955,\n    \"stddev_ts\": 0.001294,\n    \"samples_ns\": [ 5520490976, 5520873426, 5521100671 ],\n    \"samples_ts\": [ 23.1863, 23.1847, 23.1838 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:02:57Z\",\n    \"avg_ns\": 10347088549,\n    \"stddev_ns\": 2604161,\n    \"avg_ts\": 12.370630,\n    \"stddev_ts\": 0.003113,\n    \"samples_ns\": [ 10346883923, 10349788987, 10344592737 ],\n    \"samples_ts\": [ 12.3709, 12.3674, 12.3736 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:02:35Z",
+          "avg_ns": 5520821691,
+          "stddev_ns": 308122,
+          "avg_ts": 23.184955,
+          "stddev_ts": 0.001294,
+          "samples_ns": [
+            5520490976,
+            5520873426,
+            5521100671
+          ],
+          "samples_ts": [
+            23.1863,
+            23.1847,
+            23.1838
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:02:57Z",
+          "avg_ns": 10347088549,
+          "stddev_ns": 2604161,
+          "avg_ts": 12.37063,
+          "stddev_ts": 0.003113,
+          "samples_ns": [
+            10346883923,
+            10349788987,
+            10344592737
+          ],
+          "samples_ts": [
+            12.3709,
+            12.3674,
+            12.3736
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 472
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:05:57.047274+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:03:29Z\",\n    \"avg_ns\": 5523392285,\n    \"stddev_ns\": 349905,\n    \"avg_ts\": 23.174164,\n    \"stddev_ts\": 0.001400,\n    \"samples_ns\": [ 5523313386, 5523758413, 5523105058 ],\n    \"samples_ts\": [ 23.1745, 23.1726, 23.1754 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:03:51Z\",\n    \"avg_ns\": 41780695162,\n    \"stddev_ns\": 2286212,\n    \"avg_ts\": 12.254463,\n    \"stddev_ts\": 0.000665,\n    \"samples_ns\": [ 41780334272, 41778629383, 41783121833 ],\n    \"samples_ts\": [ 12.2546, 12.2551, 12.2538 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:03:29Z",
+          "avg_ns": 5523392285,
+          "stddev_ns": 349905,
+          "avg_ts": 23.174164,
+          "stddev_ts": 0.0014,
+          "samples_ns": [
+            5523313386,
+            5523758413,
+            5523105058
+          ],
+          "samples_ts": [
+            23.1745,
+            23.1726,
+            23.1754
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:03:51Z",
+          "avg_ns": 41780695162,
+          "stddev_ns": 2286212,
+          "avg_ts": 12.254463,
+          "stddev_ts": 0.000665,
+          "samples_ns": [
+            41780334272,
+            41778629383,
+            41783121833
+          ],
+          "samples_ts": [
+            12.2546,
+            12.2551,
+            12.2538
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 473
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:07:57.923076+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:05:57Z\",\n    \"avg_ns\": 22195151946,\n    \"stddev_ns\": 480151,\n    \"avg_ts\": 23.068101,\n    \"stddev_ts\": 0.000499,\n    \"samples_ns\": [ 22195594769, 22194641612, 22195219457 ],\n    \"samples_ts\": [ 23.0676, 23.0686, 23.068 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:07:26Z\",\n    \"avg_ns\": 10350483109,\n    \"stddev_ns\": 865842,\n    \"avg_ts\": 12.366573,\n    \"stddev_ts\": 0.001035,\n    \"samples_ns\": [ 10349485336, 10351036947, 10350927044 ],\n    \"samples_ts\": [ 12.3678, 12.3659, 12.366 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:05:57Z",
+          "avg_ns": 22195151946,
+          "stddev_ns": 480151,
+          "avg_ts": 23.068101,
+          "stddev_ts": 0.000499,
+          "samples_ns": [
+            22195594769,
+            22194641612,
+            22195219457
+          ],
+          "samples_ts": [
+            23.0676,
+            23.0686,
+            23.068
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:07:26Z",
+          "avg_ns": 10350483109,
+          "stddev_ns": 865842,
+          "avg_ts": 12.366573,
+          "stddev_ts": 0.001035,
+          "samples_ns": [
+            10349485336,
+            10351036947,
+            10350927044
+          ],
+          "samples_ts": [
+            12.3678,
+            12.3659,
+            12.366
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 474
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:11:33.070842+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:07:58Z\",\n    \"avg_ns\": 22201611514,\n    \"stddev_ns\": 519614,\n    \"avg_ts\": 23.061389,\n    \"stddev_ts\": 0.000517,\n    \"samples_ns\": [ 22202034612, 22201736921, 22201063010 ],\n    \"samples_ts\": [ 23.0609, 23.0613, 23.062 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:09:27Z\",\n    \"avg_ns\": 41772007109,\n    \"stddev_ns\": 640441,\n    \"avg_ts\": 12.257012,\n    \"stddev_ts\": 0.000178,\n    \"samples_ns\": [ 41772699056, 41771757578, 41771564694 ],\n    \"samples_ts\": [ 12.2568, 12.2571, 12.2571 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:07:58Z",
+          "avg_ns": 22201611514,
+          "stddev_ns": 519614,
+          "avg_ts": 23.061389,
+          "stddev_ts": 0.000517,
+          "samples_ns": [
+            22202034612,
+            22201736921,
+            22201063010
+          ],
+          "samples_ts": [
+            23.0609,
+            23.0613,
+            23.062
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:09:27Z",
+          "avg_ns": 41772007109,
+          "stddev_ns": 640441,
+          "avg_ts": 12.257012,
+          "stddev_ts": 0.000178,
+          "samples_ns": [
+            41772699056,
+            41771757578,
+            41771564694
+          ],
+          "samples_ts": [
+            12.2568,
+            12.2571,
+            12.2571
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 475
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:12:27.265566+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:11:33Z\",\n    \"avg_ns\": 5521534342,\n    \"stddev_ns\": 977568,\n    \"avg_ts\": 23.181963,\n    \"stddev_ts\": 0.004080,\n    \"samples_ns\": [ 5521025060, 5522655050, 5520922918 ],\n    \"samples_ts\": [ 23.1841, 23.1773, 23.1845 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:11:56Z\",\n    \"avg_ns\": 10349197683,\n    \"stddev_ns\": 4729938,\n    \"avg_ts\": 12.368110,\n    \"stddev_ts\": 0.005650,\n    \"samples_ns\": [ 10354155323, 10348698253, 10344739475 ],\n    \"samples_ts\": [ 12.3622, 12.3687, 12.3734 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:11:33Z",
+          "avg_ns": 5521534342,
+          "stddev_ns": 977568,
+          "avg_ts": 23.181963,
+          "stddev_ts": 0.00408,
+          "samples_ns": [
+            5521025060,
+            5522655050,
+            5520922918
+          ],
+          "samples_ts": [
+            23.1841,
+            23.1773,
+            23.1845
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:11:56Z",
+          "avg_ns": 10349197683,
+          "stddev_ns": 4729938,
+          "avg_ts": 12.36811,
+          "stddev_ts": 0.00565,
+          "samples_ns": [
+            10354155323,
+            10348698253,
+            10344739475
+          ],
+          "samples_ts": [
+            12.3622,
+            12.3687,
+            12.3734
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 476
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:14:56.938150+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:12:28Z\",\n    \"avg_ns\": 5518989705,\n    \"stddev_ns\": 92315,\n    \"avg_ts\": 23.192651,\n    \"stddev_ts\": 0.000388,\n    \"samples_ns\": [ 5519039914, 5518883167, 5519046034 ],\n    \"samples_ts\": [ 23.1924, 23.1931, 23.1924 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:12:50Z\",\n    \"avg_ns\": 42189465564,\n    \"stddev_ns\": 84884547,\n    \"avg_ts\": 12.135764,\n    \"stddev_ts\": 0.024389,\n    \"samples_ns\": [ 42287473434, 42141531799, 42139391461 ],\n    \"samples_ts\": [ 12.1076, 12.1495, 12.1502 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:12:28Z",
+          "avg_ns": 5518989705,
+          "stddev_ns": 92315,
+          "avg_ts": 23.192651,
+          "stddev_ts": 0.000388,
+          "samples_ns": [
+            5519039914,
+            5518883167,
+            5519046034
+          ],
+          "samples_ts": [
+            23.1924,
+            23.1931,
+            23.1924
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:12:50Z",
+          "avg_ns": 42189465564,
+          "stddev_ns": 84884547,
+          "avg_ts": 12.135764,
+          "stddev_ts": 0.024389,
+          "samples_ns": [
+            42287473434,
+            42141531799,
+            42139391461
+          ],
+          "samples_ts": [
+            12.1076,
+            12.1495,
+            12.1502
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 477
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:16:58.825440+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:14:57Z\",\n    \"avg_ns\": 22433895618,\n    \"stddev_ns\": 338324,\n    \"avg_ts\": 22.822608,\n    \"stddev_ts\": 0.000309,\n    \"samples_ns\": [ 22433903127, 22434195158, 22433588570 ],\n    \"samples_ts\": [ 22.8226, 22.8223, 22.8229 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:16:27Z\",\n    \"avg_ns\": 10375395120,\n    \"stddev_ns\": 3756267,\n    \"avg_ts\": 12.336881,\n    \"stddev_ts\": 0.004465,\n    \"samples_ns\": [ 10371233080, 10376423889, 10378528392 ],\n    \"samples_ts\": [ 12.3418, 12.3357, 12.3332 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:14:57Z",
+          "avg_ns": 22433895618,
+          "stddev_ns": 338324,
+          "avg_ts": 22.822608,
+          "stddev_ts": 0.000309,
+          "samples_ns": [
+            22433903127,
+            22434195158,
+            22433588570
+          ],
+          "samples_ts": [
+            22.8226,
+            22.8223,
+            22.8229
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:16:27Z",
+          "avg_ns": 10375395120,
+          "stddev_ns": 3756267,
+          "avg_ts": 12.336881,
+          "stddev_ts": 0.004465,
+          "samples_ns": [
+            10371233080,
+            10376423889,
+            10378528392
+          ],
+          "samples_ts": [
+            12.3418,
+            12.3357,
+            12.3332
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 478
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:20:34.942870+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:16:59Z\",\n    \"avg_ns\": 22424362383,\n    \"stddev_ns\": 179502,\n    \"avg_ts\": 22.832310,\n    \"stddev_ts\": 0.000101,\n    \"samples_ns\": [ 22424446784, 22424386923, 22424253443 ],\n    \"samples_ts\": [ 22.8322, 22.8323, 22.8324 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:18:29Z\",\n    \"avg_ns\": 41780967167,\n    \"stddev_ns\": 6714366,\n    \"avg_ts\": 12.254384,\n    \"stddev_ts\": 0.001969,\n    \"samples_ns\": [ 41788500256, 41778788747, 41775612498 ],\n    \"samples_ts\": [ 12.2522, 12.255, 12.256 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:16:59Z",
+          "avg_ns": 22424362383,
+          "stddev_ns": 179502,
+          "avg_ts": 22.83231,
+          "stddev_ts": 0.000101,
+          "samples_ns": [
+            22424446784,
+            22424386923,
+            22424253443
+          ],
+          "samples_ts": [
+            22.8322,
+            22.8323,
+            22.8324
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:18:29Z",
+          "avg_ns": 41780967167,
+          "stddev_ns": 6714366,
+          "avg_ts": 12.254384,
+          "stddev_ts": 0.001969,
+          "samples_ns": [
+            41788500256,
+            41778788747,
+            41775612498
+          ],
+          "samples_ts": [
+            12.2522,
+            12.255,
+            12.256
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 479
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:21:29.136931+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:20:35Z\",\n    \"avg_ns\": 5520236930,\n    \"stddev_ns\": 377472,\n    \"avg_ts\": 23.187411,\n    \"stddev_ts\": 0.001586,\n    \"samples_ns\": [ 5520547309, 5520346758, 5519816723 ],\n    \"samples_ts\": [ 23.1861, 23.1869, 23.1892 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:20:57Z\",\n    \"avg_ns\": 10364712474,\n    \"stddev_ns\": 2647996,\n    \"avg_ts\": 12.349595,\n    \"stddev_ts\": 0.003153,\n    \"samples_ns\": [ 10366307014, 10366172327, 10361658082 ],\n    \"samples_ts\": [ 12.3477, 12.3479, 12.3532 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:20:35Z",
+          "avg_ns": 5520236930,
+          "stddev_ns": 377472,
+          "avg_ts": 23.187411,
+          "stddev_ts": 0.001586,
+          "samples_ns": [
+            5520547309,
+            5520346758,
+            5519816723
+          ],
+          "samples_ts": [
+            23.1861,
+            23.1869,
+            23.1892
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:20:57Z",
+          "avg_ns": 10364712474,
+          "stddev_ns": 2647996,
+          "avg_ts": 12.349595,
+          "stddev_ts": 0.003153,
+          "samples_ns": [
+            10366307014,
+            10366172327,
+            10361658082
+          ],
+          "samples_ts": [
+            12.3477,
+            12.3479,
+            12.3532
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 480
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:23:57.683030+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:21:29Z\",\n    \"avg_ns\": 5522375691,\n    \"stddev_ns\": 306611,\n    \"avg_ts\": 23.178430,\n    \"stddev_ts\": 0.001249,\n    \"samples_ns\": [ 5522709328, 5522138140, 5522279606 ],\n    \"samples_ts\": [ 23.177, 23.1794, 23.1788 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:21:52Z\",\n    \"avg_ns\": 41810875168,\n    \"stddev_ns\": 1769789,\n    \"avg_ts\": 12.245618,\n    \"stddev_ts\": 0.000518,\n    \"samples_ns\": [ 41808831624, 41811886902, 41811906978 ],\n    \"samples_ts\": [ 12.2462, 12.2453, 12.2453 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:21:29Z",
+          "avg_ns": 5522375691,
+          "stddev_ns": 306611,
+          "avg_ts": 23.17843,
+          "stddev_ts": 0.001249,
+          "samples_ns": [
+            5522709328,
+            5522138140,
+            5522279606
+          ],
+          "samples_ts": [
+            23.177,
+            23.1794,
+            23.1788
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:21:52Z",
+          "avg_ns": 41810875168,
+          "stddev_ns": 1769789,
+          "avg_ts": 12.245618,
+          "stddev_ts": 0.000518,
+          "samples_ns": [
+            41808831624,
+            41811886902,
+            41811906978
+          ],
+          "samples_ts": [
+            12.2462,
+            12.2453,
+            12.2453
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 481
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:25:58.498465+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:23:58Z\",\n    \"avg_ns\": 22177329409,\n    \"stddev_ns\": 1385148,\n    \"avg_ts\": 23.086639,\n    \"stddev_ts\": 0.001425,\n    \"samples_ns\": [ 22175889053, 22178613778, 22177485398 ],\n    \"samples_ts\": [ 23.0881, 23.0853, 23.0865 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:25:27Z\",\n    \"avg_ns\": 10362945834,\n    \"stddev_ns\": 17669008,\n    \"avg_ts\": 12.351724,\n    \"stddev_ts\": 0.021040,\n    \"samples_ns\": [ 10383252927, 10351086314, 10354498261 ],\n    \"samples_ts\": [ 12.3275, 12.3659, 12.3618 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:23:58Z",
+          "avg_ns": 22177329409,
+          "stddev_ns": 1385148,
+          "avg_ts": 23.086639,
+          "stddev_ts": 0.001425,
+          "samples_ns": [
+            22175889053,
+            22178613778,
+            22177485398
+          ],
+          "samples_ts": [
+            23.0881,
+            23.0853,
+            23.0865
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:25:27Z",
+          "avg_ns": 10362945834,
+          "stddev_ns": 17669008,
+          "avg_ts": 12.351724,
+          "stddev_ts": 0.02104,
+          "samples_ns": [
+            10383252927,
+            10351086314,
+            10354498261
+          ],
+          "samples_ts": [
+            12.3275,
+            12.3659,
+            12.3618
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 482
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:29:33.454058+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:25:59Z\",\n    \"avg_ns\": 22172177856,\n    \"stddev_ns\": 1024332,\n    \"avg_ts\": 23.092003,\n    \"stddev_ts\": 0.001044,\n    \"samples_ns\": [ 22172406186, 22173046449, 22171080935 ],\n    \"samples_ts\": [ 23.0918, 23.0911, 23.0931 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:27:28Z\",\n    \"avg_ns\": 41748489754,\n    \"stddev_ns\": 3249020,\n    \"avg_ts\": 12.263917,\n    \"stddev_ts\": 0.000953,\n    \"samples_ns\": [ 41751793810, 41745312341, 41748363112 ],\n    \"samples_ts\": [ 12.2629, 12.2649, 12.264 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:25:59Z",
+          "avg_ns": 22172177856,
+          "stddev_ns": 1024332,
+          "avg_ts": 23.092003,
+          "stddev_ts": 0.001044,
+          "samples_ns": [
+            22172406186,
+            22173046449,
+            22171080935
+          ],
+          "samples_ts": [
+            23.0918,
+            23.0911,
+            23.0931
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:27:28Z",
+          "avg_ns": 41748489754,
+          "stddev_ns": 3249020,
+          "avg_ts": 12.263917,
+          "stddev_ts": 0.000953,
+          "samples_ns": [
+            41751793810,
+            41745312341,
+            41748363112
+          ],
+          "samples_ts": [
+            12.2629,
+            12.2649,
+            12.264
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 483
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:30:27.645170+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:29:34Z\",\n    \"avg_ns\": 5520717156,\n    \"stddev_ns\": 385549,\n    \"avg_ts\": 23.185394,\n    \"stddev_ts\": 0.001619,\n    \"samples_ns\": [ 5520331008, 5521102104, 5520718356 ],\n    \"samples_ts\": [ 23.187, 23.1838, 23.1854 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:29:56Z\",\n    \"avg_ns\": 10362822876,\n    \"stddev_ns\": 1077302,\n    \"avg_ts\": 12.351847,\n    \"stddev_ts\": 0.001284,\n    \"samples_ns\": [ 10363940853, 10362736302, 10361791473 ],\n    \"samples_ts\": [ 12.3505, 12.3519, 12.3531 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:29:34Z",
+          "avg_ns": 5520717156,
+          "stddev_ns": 385549,
+          "avg_ts": 23.185394,
+          "stddev_ts": 0.001619,
+          "samples_ns": [
+            5520331008,
+            5521102104,
+            5520718356
+          ],
+          "samples_ts": [
+            23.187,
+            23.1838,
+            23.1854
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:29:56Z",
+          "avg_ns": 10362822876,
+          "stddev_ns": 1077302,
+          "avg_ts": 12.351847,
+          "stddev_ts": 0.001284,
+          "samples_ns": [
+            10363940853,
+            10362736302,
+            10361791473
+          ],
+          "samples_ts": [
+            12.3505,
+            12.3519,
+            12.3531
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 484
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:32:56.204825+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:30:28Z\",\n    \"avg_ns\": 5522956946,\n    \"stddev_ns\": 507716,\n    \"avg_ts\": 23.175991,\n    \"stddev_ts\": 0.002084,\n    \"samples_ns\": [ 5522625689, 5522717074, 5523528077 ],\n    \"samples_ts\": [ 23.1774, 23.177, 23.1736 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:30:50Z\",\n    \"avg_ns\": 41807603157,\n    \"stddev_ns\": 2730184,\n    \"avg_ts\": 12.246576,\n    \"stddev_ts\": 0.000798,\n    \"samples_ns\": [ 41810078144, 41808044334, 41804686994 ],\n    \"samples_ts\": [ 12.2459, 12.2464, 12.2474 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:30:28Z",
+          "avg_ns": 5522956946,
+          "stddev_ns": 507716,
+          "avg_ts": 23.175991,
+          "stddev_ts": 0.002084,
+          "samples_ns": [
+            5522625689,
+            5522717074,
+            5523528077
+          ],
+          "samples_ts": [
+            23.1774,
+            23.177,
+            23.1736
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:30:50Z",
+          "avg_ns": 41807603157,
+          "stddev_ns": 2730184,
+          "avg_ts": 12.246576,
+          "stddev_ts": 0.000798,
+          "samples_ns": [
+            41810078144,
+            41808044334,
+            41804686994
+          ],
+          "samples_ts": [
+            12.2459,
+            12.2464,
+            12.2474
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 485
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:34:57.098705+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:32:57Z\",\n    \"avg_ns\": 22207945617,\n    \"stddev_ns\": 552473,\n    \"avg_ts\": 23.054812,\n    \"stddev_ts\": 0.000530,\n    \"samples_ns\": [ 22207401880, 22208019818, 22208415155 ],\n    \"samples_ts\": [ 23.0554, 23.0547, 23.0543 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:34:25Z\",\n    \"avg_ns\": 10348542539,\n    \"stddev_ns\": 595288,\n    \"avg_ts\": 12.368892,\n    \"stddev_ts\": 0.000712,\n    \"samples_ns\": [ 10347947689, 10349138266, 10348541662 ],\n    \"samples_ts\": [ 12.3696, 12.3682, 12.3689 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:32:57Z",
+          "avg_ns": 22207945617,
+          "stddev_ns": 552473,
+          "avg_ts": 23.054812,
+          "stddev_ts": 0.00053,
+          "samples_ns": [
+            22207401880,
+            22208019818,
+            22208415155
+          ],
+          "samples_ts": [
+            23.0554,
+            23.0547,
+            23.0543
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:34:25Z",
+          "avg_ns": 10348542539,
+          "stddev_ns": 595288,
+          "avg_ts": 12.368892,
+          "stddev_ts": 0.000712,
+          "samples_ns": [
+            10347947689,
+            10349138266,
+            10348541662
+          ],
+          "samples_ts": [
+            12.3696,
+            12.3682,
+            12.3689
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 486
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:38:32.248428+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:34:57Z\",\n    \"avg_ns\": 22206301340,\n    \"stddev_ns\": 507828,\n    \"avg_ts\": 23.056519,\n    \"stddev_ts\": 0.000504,\n    \"samples_ns\": [ 22206698902, 22206444819, 22205760300 ],\n    \"samples_ts\": [ 23.0561, 23.0564, 23.0571 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:36:26Z\",\n    \"avg_ns\": 41758070666,\n    \"stddev_ns\": 2825246,\n    \"avg_ts\": 12.261103,\n    \"stddev_ts\": 0.000827,\n    \"samples_ns\": [ 41761156485, 41757421374, 41755634140 ],\n    \"samples_ts\": [ 12.2602, 12.2613, 12.2618 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:34:57Z",
+          "avg_ns": 22206301340,
+          "stddev_ns": 507828,
+          "avg_ts": 23.056519,
+          "stddev_ts": 0.000504,
+          "samples_ns": [
+            22206698902,
+            22206444819,
+            22205760300
+          ],
+          "samples_ts": [
+            23.0561,
+            23.0564,
+            23.0571
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:36:26Z",
+          "avg_ns": 41758070666,
+          "stddev_ns": 2825246,
+          "avg_ts": 12.261103,
+          "stddev_ts": 0.000827,
+          "samples_ns": [
+            41761156485,
+            41757421374,
+            41755634140
+          ],
+          "samples_ts": [
+            12.2602,
+            12.2613,
+            12.2618
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 487
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:39:26.509170+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:38:33Z\",\n    \"avg_ns\": 5524526455,\n    \"stddev_ns\": 527436,\n    \"avg_ts\": 23.169407,\n    \"stddev_ts\": 0.002168,\n    \"samples_ns\": [ 5523937853, 5524906199, 5524735315 ],\n    \"samples_ts\": [ 23.1719, 23.1678, 23.1685 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:38:55Z\",\n    \"avg_ns\": 10366684664,\n    \"stddev_ns\": 2582807,\n    \"avg_ts\": 12.347246,\n    \"stddev_ts\": 0.003076,\n    \"samples_ns\": [ 10369433775, 10366311451, 10364308766 ],\n    \"samples_ts\": [ 12.344, 12.3477, 12.3501 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:38:33Z",
+          "avg_ns": 5524526455,
+          "stddev_ns": 527436,
+          "avg_ts": 23.169407,
+          "stddev_ts": 0.002168,
+          "samples_ns": [
+            5523937853,
+            5524906199,
+            5524735315
+          ],
+          "samples_ts": [
+            23.1719,
+            23.1678,
+            23.1685
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:38:55Z",
+          "avg_ns": 10366684664,
+          "stddev_ns": 2582807,
+          "avg_ts": 12.347246,
+          "stddev_ts": 0.003076,
+          "samples_ns": [
+            10369433775,
+            10366311451,
+            10364308766
+          ],
+          "samples_ts": [
+            12.344,
+            12.3477,
+            12.3501
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 488
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:41:55.306843+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:39:27Z\",\n    \"avg_ns\": 5523735565,\n    \"stddev_ns\": 958664,\n    \"avg_ts\": 23.172725,\n    \"stddev_ts\": 0.003997,\n    \"samples_ns\": [ 5524738428, 5522842101, 5523626168 ],\n    \"samples_ts\": [ 23.1685, 23.1765, 23.1732 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:39:49Z\",\n    \"avg_ns\": 41883236917,\n    \"stddev_ns\": 8749411,\n    \"avg_ts\": 12.224461,\n    \"stddev_ts\": 0.002553,\n    \"samples_ns\": [ 41892737402, 41875517516, 41881455834 ],\n    \"samples_ts\": [ 12.2217, 12.2267, 12.225 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:39:27Z",
+          "avg_ns": 5523735565,
+          "stddev_ns": 958664,
+          "avg_ts": 23.172725,
+          "stddev_ts": 0.003997,
+          "samples_ns": [
+            5524738428,
+            5522842101,
+            5523626168
+          ],
+          "samples_ts": [
+            23.1685,
+            23.1765,
+            23.1732
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:39:49Z",
+          "avg_ns": 41883236917,
+          "stddev_ns": 8749411,
+          "avg_ts": 12.224461,
+          "stddev_ts": 0.002553,
+          "samples_ns": [
+            41892737402,
+            41875517516,
+            41881455834
+          ],
+          "samples_ts": [
+            12.2217,
+            12.2267,
+            12.225
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 489
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:43:57.092430+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:41:56Z\",\n    \"avg_ns\": 22423680401,\n    \"stddev_ns\": 574431,\n    \"avg_ts\": 22.833005,\n    \"stddev_ts\": 0.000544,\n    \"samples_ns\": [ 22423470327, 22423283417, 22424287461 ],\n    \"samples_ts\": [ 22.8332, 22.8334, 22.8324 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:43:25Z\",\n    \"avg_ns\": 10348129248,\n    \"stddev_ns\": 3739893,\n    \"avg_ts\": 12.369387,\n    \"stddev_ts\": 0.004467,\n    \"samples_ns\": [ 10351659764, 10348512875, 10344215107 ],\n    \"samples_ts\": [ 12.3652, 12.3689, 12.3741 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:41:56Z",
+          "avg_ns": 22423680401,
+          "stddev_ns": 574431,
+          "avg_ts": 22.833005,
+          "stddev_ts": 0.000544,
+          "samples_ns": [
+            22423470327,
+            22423283417,
+            22424287461
+          ],
+          "samples_ts": [
+            22.8332,
+            22.8334,
+            22.8324
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:43:25Z",
+          "avg_ns": 10348129248,
+          "stddev_ns": 3739893,
+          "avg_ts": 12.369387,
+          "stddev_ts": 0.004467,
+          "samples_ns": [
+            10351659764,
+            10348512875,
+            10344215107
+          ],
+          "samples_ts": [
+            12.3652,
+            12.3689,
+            12.3741
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 490
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:47:33.087963+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:43:57Z\",\n    \"avg_ns\": 22417443181,\n    \"stddev_ns\": 344762,\n    \"avg_ts\": 22.839358,\n    \"stddev_ts\": 0.000316,\n    \"samples_ns\": [ 22417176803, 22417784278, 22417368463 ],\n    \"samples_ts\": [ 22.8396, 22.839, 22.8394 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:45:27Z\",\n    \"avg_ns\": 41766507746,\n    \"stddev_ns\": 3512809,\n    \"avg_ts\": 12.258626,\n    \"stddev_ts\": 0.001031,\n    \"samples_ns\": [ 41769939613, 41762919234, 41766664391 ],\n    \"samples_ts\": [ 12.2576, 12.2597, 12.2586 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:43:57Z",
+          "avg_ns": 22417443181,
+          "stddev_ns": 344762,
+          "avg_ts": 22.839358,
+          "stddev_ts": 0.000316,
+          "samples_ns": [
+            22417176803,
+            22417784278,
+            22417368463
+          ],
+          "samples_ts": [
+            22.8396,
+            22.839,
+            22.8394
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:45:27Z",
+          "avg_ns": 41766507746,
+          "stddev_ns": 3512809,
+          "avg_ts": 12.258626,
+          "stddev_ts": 0.001031,
+          "samples_ns": [
+            41769939613,
+            41762919234,
+            41766664391
+          ],
+          "samples_ts": [
+            12.2576,
+            12.2597,
+            12.2586
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 491
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:48:27.309793+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:47:33Z\",\n    \"avg_ns\": 5521472037,\n    \"stddev_ns\": 1702944,\n    \"avg_ts\": 23.182225,\n    \"stddev_ts\": 0.007149,\n    \"samples_ns\": [ 5520428425, 5523437164, 5520550522 ],\n    \"samples_ts\": [ 23.1866, 23.174, 23.1861 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:47:56Z\",\n    \"avg_ns\": 10364957196,\n    \"stddev_ns\": 2737331,\n    \"avg_ts\": 12.349304,\n    \"stddev_ts\": 0.003261,\n    \"samples_ns\": [ 10367990294, 10364210856, 10362670438 ],\n    \"samples_ts\": [ 12.3457, 12.3502, 12.352 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:47:33Z",
+          "avg_ns": 5521472037,
+          "stddev_ns": 1702944,
+          "avg_ts": 23.182225,
+          "stddev_ts": 0.007149,
+          "samples_ns": [
+            5520428425,
+            5523437164,
+            5520550522
+          ],
+          "samples_ts": [
+            23.1866,
+            23.174,
+            23.1861
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:47:56Z",
+          "avg_ns": 10364957196,
+          "stddev_ns": 2737331,
+          "avg_ts": 12.349304,
+          "stddev_ts": 0.003261,
+          "samples_ns": [
+            10367990294,
+            10364210856,
+            10362670438
+          ],
+          "samples_ts": [
+            12.3457,
+            12.3502,
+            12.352
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 492
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:50:56.839807+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:48:28Z\",\n    \"avg_ns\": 5519520954,\n    \"stddev_ns\": 386598,\n    \"avg_ts\": 23.190418,\n    \"stddev_ts\": 0.001594,\n    \"samples_ns\": [ 5519757148, 5519083329, 5519722386 ],\n    \"samples_ts\": [ 23.1894, 23.1923, 23.1896 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:48:50Z\",\n    \"avg_ns\": 42142587574,\n    \"stddev_ns\": 3430085,\n    \"avg_ts\": 12.149230,\n    \"stddev_ts\": 0.000989,\n    \"samples_ns\": [ 42146515065, 42140180439, 42141067218 ],\n    \"samples_ts\": [ 12.1481, 12.1499, 12.1497 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:48:28Z",
+          "avg_ns": 5519520954,
+          "stddev_ns": 386598,
+          "avg_ts": 23.190418,
+          "stddev_ts": 0.001594,
+          "samples_ns": [
+            5519757148,
+            5519083329,
+            5519722386
+          ],
+          "samples_ts": [
+            23.1894,
+            23.1923,
+            23.1896
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:48:50Z",
+          "avg_ns": 42142587574,
+          "stddev_ns": 3430085,
+          "avg_ts": 12.14923,
+          "stddev_ts": 0.000989,
+          "samples_ns": [
+            42146515065,
+            42140180439,
+            42141067218
+          ],
+          "samples_ts": [
+            12.1481,
+            12.1499,
+            12.1497
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 493
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:52:57.613436+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:50:57Z\",\n    \"avg_ns\": 22174941033,\n    \"stddev_ns\": 1645821,\n    \"avg_ts\": 23.089126,\n    \"stddev_ts\": 0.001707,\n    \"samples_ns\": [ 22176247377, 22175473886, 22173101837 ],\n    \"samples_ts\": [ 23.0878, 23.0886, 23.091 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:52:26Z\",\n    \"avg_ns\": 10350875403,\n    \"stddev_ns\": 863158,\n    \"avg_ts\": 12.366104,\n    \"stddev_ts\": 0.001024,\n    \"samples_ns\": [ 10351123485, 10351581142, 10349921583 ],\n    \"samples_ts\": [ 12.3658, 12.3653, 12.3672 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:50:57Z",
+          "avg_ns": 22174941033,
+          "stddev_ns": 1645821,
+          "avg_ts": 23.089126,
+          "stddev_ts": 0.001707,
+          "samples_ns": [
+            22176247377,
+            22175473886,
+            22173101837
+          ],
+          "samples_ts": [
+            23.0878,
+            23.0886,
+            23.091
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:52:26Z",
+          "avg_ns": 10350875403,
+          "stddev_ns": 863158,
+          "avg_ts": 12.366104,
+          "stddev_ts": 0.001024,
+          "samples_ns": [
+            10351123485,
+            10351581142,
+            10349921583
+          ],
+          "samples_ts": [
+            12.3658,
+            12.3653,
+            12.3672
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 494
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:56:32.614126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:52:58Z\",\n    \"avg_ns\": 22181626799,\n    \"stddev_ns\": 646116,\n    \"avg_ts\": 23.082166,\n    \"stddev_ts\": 0.000636,\n    \"samples_ns\": [ 22182006713, 22181951481, 22180922205 ],\n    \"samples_ts\": [ 23.0818, 23.0818, 23.0829 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:54:27Z\",\n    \"avg_ns\": 41750021767,\n    \"stddev_ns\": 3593827,\n    \"avg_ts\": 12.263467,\n    \"stddev_ts\": 0.001052,\n    \"samples_ns\": [ 41750383873, 41753409154, 41746272276 ],\n    \"samples_ts\": [ 12.2634, 12.2625, 12.2646 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:52:58Z",
+          "avg_ns": 22181626799,
+          "stddev_ns": 646116,
+          "avg_ts": 23.082166,
+          "stddev_ts": 0.000636,
+          "samples_ns": [
+            22182006713,
+            22181951481,
+            22180922205
+          ],
+          "samples_ts": [
+            23.0818,
+            23.0818,
+            23.0829
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:54:27Z",
+          "avg_ns": 41750021767,
+          "stddev_ns": 3593827,
+          "avg_ts": 12.263467,
+          "stddev_ts": 0.001052,
+          "samples_ns": [
+            41750383873,
+            41753409154,
+            41746272276
+          ],
+          "samples_ts": [
+            12.2634,
+            12.2625,
+            12.2646
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 495
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:57:26.880537+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:56:33Z\",\n    \"avg_ns\": 5522015874,\n    \"stddev_ns\": 361920,\n    \"avg_ts\": 23.179941,\n    \"stddev_ts\": 0.001519,\n    \"samples_ns\": [ 5521695198, 5522408293, 5521944131 ],\n    \"samples_ts\": [ 23.1813, 23.1783, 23.1802 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:56:55Z\",\n    \"avg_ns\": 10369326786,\n    \"stddev_ns\": 4813587,\n    \"avg_ts\": 12.344101,\n    \"stddev_ts\": 0.005730,\n    \"samples_ns\": [ 10374330357, 10368921148, 10364728853 ],\n    \"samples_ts\": [ 12.3381, 12.3446, 12.3496 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:56:33Z",
+          "avg_ns": 5522015874,
+          "stddev_ns": 361920,
+          "avg_ts": 23.179941,
+          "stddev_ts": 0.001519,
+          "samples_ns": [
+            5521695198,
+            5522408293,
+            5521944131
+          ],
+          "samples_ts": [
+            23.1813,
+            23.1783,
+            23.1802
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:56:55Z",
+          "avg_ns": 10369326786,
+          "stddev_ns": 4813587,
+          "avg_ts": 12.344101,
+          "stddev_ts": 0.00573,
+          "samples_ns": [
+            10374330357,
+            10368921148,
+            10364728853
+          ],
+          "samples_ts": [
+            12.3381,
+            12.3446,
+            12.3496
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 496
+    },
+    {
+      "timestamp_utc": "2025-12-09T03:59:55.418636+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:57:27Z\",\n    \"avg_ns\": 5520768585,\n    \"stddev_ns\": 1561272,\n    \"avg_ts\": 23.185179,\n    \"stddev_ts\": 0.006549,\n    \"samples_ns\": [ 5522520719, 5519532558, 5520252479 ],\n    \"samples_ts\": [ 23.1778, 23.1904, 23.1873 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:57:49Z\",\n    \"avg_ns\": 41794759443,\n    \"stddev_ns\": 2901591,\n    \"avg_ts\": 12.250340,\n    \"stddev_ts\": 0.000850,\n    \"samples_ns\": [ 41797311718, 41795363107, 41791603504 ],\n    \"samples_ts\": [ 12.2496, 12.2502, 12.2513 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:57:27Z",
+          "avg_ns": 5520768585,
+          "stddev_ns": 1561272,
+          "avg_ts": 23.185179,
+          "stddev_ts": 0.006549,
+          "samples_ns": [
+            5522520719,
+            5519532558,
+            5520252479
+          ],
+          "samples_ts": [
+            23.1778,
+            23.1904,
+            23.1873
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:57:49Z",
+          "avg_ns": 41794759443,
+          "stddev_ns": 2901591,
+          "avg_ts": 12.25034,
+          "stddev_ts": 0.00085,
+          "samples_ns": [
+            41797311718,
+            41795363107,
+            41791603504
+          ],
+          "samples_ts": [
+            12.2496,
+            12.2502,
+            12.2513
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 497
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:01:56.291937+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T03:59:56Z\",\n    \"avg_ns\": 22198062704,\n    \"stddev_ns\": 1416177,\n    \"avg_ts\": 23.065076,\n    \"stddev_ts\": 0.001455,\n    \"samples_ns\": [ 22197396490, 22199671853, 22197119771 ],\n    \"samples_ts\": [ 23.0658, 23.0634, 23.0661 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:01:25Z\",\n    \"avg_ns\": 10353711922,\n    \"stddev_ns\": 2650622,\n    \"avg_ts\": 12.362717,\n    \"stddev_ts\": 0.003160,\n    \"samples_ns\": [ 10353364846, 10356515051, 10351255871 ],\n    \"samples_ts\": [ 12.3631, 12.3594, 12.3656 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T03:59:56Z",
+          "avg_ns": 22198062704,
+          "stddev_ns": 1416177,
+          "avg_ts": 23.065076,
+          "stddev_ts": 0.001455,
+          "samples_ns": [
+            22197396490,
+            22199671853,
+            22197119771
+          ],
+          "samples_ts": [
+            23.0658,
+            23.0634,
+            23.0661
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:01:25Z",
+          "avg_ns": 10353711922,
+          "stddev_ns": 2650622,
+          "avg_ts": 12.362717,
+          "stddev_ts": 0.00316,
+          "samples_ns": [
+            10353364846,
+            10356515051,
+            10351255871
+          ],
+          "samples_ts": [
+            12.3631,
+            12.3594,
+            12.3656
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 498
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:05:31.313675+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:01:57Z\",\n    \"avg_ns\": 22199455885,\n    \"stddev_ns\": 1802413,\n    \"avg_ts\": 23.063628,\n    \"stddev_ts\": 0.001866,\n    \"samples_ns\": [ 22201416854, 22199060589, 22197890213 ],\n    \"samples_ts\": [ 23.0616, 23.064, 23.0653 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:03:25Z\",\n    \"avg_ns\": 41725694287,\n    \"stddev_ns\": 7038669,\n    \"avg_ts\": 12.270617,\n    \"stddev_ts\": 0.002070,\n    \"samples_ns\": [ 41733468459, 41719754261, 41723860141 ],\n    \"samples_ts\": [ 12.2683, 12.2724, 12.2712 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:01:57Z",
+          "avg_ns": 22199455885,
+          "stddev_ns": 1802413,
+          "avg_ts": 23.063628,
+          "stddev_ts": 0.001866,
+          "samples_ns": [
+            22201416854,
+            22199060589,
+            22197890213
+          ],
+          "samples_ts": [
+            23.0616,
+            23.064,
+            23.0653
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:03:25Z",
+          "avg_ns": 41725694287,
+          "stddev_ns": 7038669,
+          "avg_ts": 12.270617,
+          "stddev_ts": 0.00207,
+          "samples_ns": [
+            41733468459,
+            41719754261,
+            41723860141
+          ],
+          "samples_ts": [
+            12.2683,
+            12.2724,
+            12.2712
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 499
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:06:25.476884+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:05:32Z\",\n    \"avg_ns\": 5520340246,\n    \"stddev_ns\": 204803,\n    \"avg_ts\": 23.186977,\n    \"stddev_ts\": 0.000802,\n    \"samples_ns\": [ 5520275331, 5520555087, 5520190321 ],\n    \"samples_ts\": [ 23.1872, 23.1861, 23.1876 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:05:54Z\",\n    \"avg_ns\": 10354673038,\n    \"stddev_ns\": 2544305,\n    \"avg_ts\": 12.361569,\n    \"stddev_ts\": 0.003035,\n    \"samples_ns\": [ 10357307181, 10354478066, 10352233868 ],\n    \"samples_ts\": [ 12.3584, 12.3618, 12.3645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:05:32Z",
+          "avg_ns": 5520340246,
+          "stddev_ns": 204803,
+          "avg_ts": 23.186977,
+          "stddev_ts": 0.000802,
+          "samples_ns": [
+            5520275331,
+            5520555087,
+            5520190321
+          ],
+          "samples_ts": [
+            23.1872,
+            23.1861,
+            23.1876
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:05:54Z",
+          "avg_ns": 10354673038,
+          "stddev_ns": 2544305,
+          "avg_ts": 12.361569,
+          "stddev_ts": 0.003035,
+          "samples_ns": [
+            10357307181,
+            10354478066,
+            10352233868
+          ],
+          "samples_ts": [
+            12.3584,
+            12.3618,
+            12.3645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 500
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:08:54.328463+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:06:26Z\",\n    \"avg_ns\": 5521951852,\n    \"stddev_ns\": 152167,\n    \"avg_ts\": 23.180209,\n    \"stddev_ts\": 0.000462,\n    \"samples_ns\": [ 5522066699, 5521941532, 5521847327 ],\n    \"samples_ts\": [ 23.1797, 23.1803, 23.1806 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:06:48Z\",\n    \"avg_ns\": 41913618264,\n    \"stddev_ns\": 6793031,\n    \"avg_ts\": 12.215600,\n    \"stddev_ts\": 0.001979,\n    \"samples_ns\": [ 41919537160, 41915111722, 41906205911 ],\n    \"samples_ts\": [ 12.2139, 12.2152, 12.2178 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:06:26Z",
+          "avg_ns": 5521951852,
+          "stddev_ns": 152167,
+          "avg_ts": 23.180209,
+          "stddev_ts": 0.000462,
+          "samples_ns": [
+            5522066699,
+            5521941532,
+            5521847327
+          ],
+          "samples_ts": [
+            23.1797,
+            23.1803,
+            23.1806
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:06:48Z",
+          "avg_ns": 41913618264,
+          "stddev_ns": 6793031,
+          "avg_ts": 12.2156,
+          "stddev_ts": 0.001979,
+          "samples_ns": [
+            41919537160,
+            41915111722,
+            41906205911
+          ],
+          "samples_ts": [
+            12.2139,
+            12.2152,
+            12.2178
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 501
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:10:56.168690+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:08:55Z\",\n    \"avg_ns\": 22421281326,\n    \"stddev_ns\": 1764722,\n    \"avg_ts\": 22.835448,\n    \"stddev_ts\": 0.001784,\n    \"samples_ns\": [ 22422039051, 22422526901, 22419278028 ],\n    \"samples_ts\": [ 22.8347, 22.8342, 22.8375 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:10:24Z\",\n    \"avg_ns\": 10362896526,\n    \"stddev_ns\": 2506586,\n    \"avg_ts\": 12.351759,\n    \"stddev_ts\": 0.002988,\n    \"samples_ns\": [ 10365208416, 10360232486, 10363248676 ],\n    \"samples_ts\": [ 12.349, 12.3549, 12.3513 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:08:55Z",
+          "avg_ns": 22421281326,
+          "stddev_ns": 1764722,
+          "avg_ts": 22.835448,
+          "stddev_ts": 0.001784,
+          "samples_ns": [
+            22422039051,
+            22422526901,
+            22419278028
+          ],
+          "samples_ts": [
+            22.8347,
+            22.8342,
+            22.8375
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:10:24Z",
+          "avg_ns": 10362896526,
+          "stddev_ns": 2506586,
+          "avg_ts": 12.351759,
+          "stddev_ts": 0.002988,
+          "samples_ns": [
+            10365208416,
+            10360232486,
+            10363248676
+          ],
+          "samples_ts": [
+            12.349,
+            12.3549,
+            12.3513
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 502
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:14:32.408308+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:10:57Z\",\n    \"avg_ns\": 22417946647,\n    \"stddev_ns\": 2196739,\n    \"avg_ts\": 22.838845,\n    \"stddev_ts\": 0.002228,\n    \"samples_ns\": [ 22415421884, 22419211304, 22419206755 ],\n    \"samples_ts\": [ 22.8414, 22.8376, 22.8376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:12:26Z\",\n    \"avg_ns\": 41833858764,\n    \"stddev_ns\": 3478326,\n    \"avg_ts\": 12.238890,\n    \"stddev_ts\": 0.001018,\n    \"samples_ns\": [ 41831728780, 41831974836, 41837872676 ],\n    \"samples_ts\": [ 12.2395, 12.2394, 12.2377 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:10:57Z",
+          "avg_ns": 22417946647,
+          "stddev_ns": 2196739,
+          "avg_ts": 22.838845,
+          "stddev_ts": 0.002228,
+          "samples_ns": [
+            22415421884,
+            22419211304,
+            22419206755
+          ],
+          "samples_ts": [
+            22.8414,
+            22.8376,
+            22.8376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:12:26Z",
+          "avg_ns": 41833858764,
+          "stddev_ns": 3478326,
+          "avg_ts": 12.23889,
+          "stddev_ts": 0.001018,
+          "samples_ns": [
+            41831728780,
+            41831974836,
+            41837872676
+          ],
+          "samples_ts": [
+            12.2395,
+            12.2394,
+            12.2377
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 503
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:15:19.663670+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:14:33Z\",\n    \"avg_ns\": 3765308803,\n    \"stddev_ns\": 123899,\n    \"avg_ts\": 33.994556,\n    \"stddev_ts\": 0.000972,\n    \"samples_ns\": [ 3765328668, 3765405125, 3765192617 ],\n    \"samples_ts\": [ 33.9944, 33.9937, 33.9956 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:14:48Z\",\n    \"avg_ns\": 10368972276,\n    \"stddev_ns\": 2796448,\n    \"avg_ts\": 12.344522,\n    \"stddev_ts\": 0.003327,\n    \"samples_ns\": [ 10365789535, 10370102806, 10371024488 ],\n    \"samples_ts\": [ 12.3483, 12.3432, 12.3421 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:14:33Z",
+          "avg_ns": 3765308803,
+          "stddev_ns": 123899,
+          "avg_ts": 33.994556,
+          "stddev_ts": 0.000972,
+          "samples_ns": [
+            3765328668,
+            3765405125,
+            3765192617
+          ],
+          "samples_ts": [
+            33.9944,
+            33.9937,
+            33.9956
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:14:48Z",
+          "avg_ns": 10368972276,
+          "stddev_ns": 2796448,
+          "avg_ts": 12.344522,
+          "stddev_ts": 0.003327,
+          "samples_ns": [
+            10365789535,
+            10370102806,
+            10371024488
+          ],
+          "samples_ts": [
+            12.3483,
+            12.3432,
+            12.3421
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 504
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:17:41.322443+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:15:20Z\",\n    \"avg_ns\": 3760265796,\n    \"stddev_ns\": 488042,\n    \"avg_ts\": 34.040147,\n    \"stddev_ts\": 0.004383,\n    \"samples_ns\": [ 3760507465, 3759708359, 3760581565 ],\n    \"samples_ts\": [ 34.038, 34.0452, 34.0373 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:15:35Z\",\n    \"avg_ns\": 41845519409,\n    \"stddev_ns\": 303654607,\n    \"avg_ts\": 12.235907,\n    \"stddev_ts\": 0.088421,\n    \"samples_ns\": [ 41663026298, 41677481884, 42196050047 ],\n    \"samples_ts\": [ 12.2891, 12.2848, 12.1338 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:15:20Z",
+          "avg_ns": 3760265796,
+          "stddev_ns": 488042,
+          "avg_ts": 34.040147,
+          "stddev_ts": 0.004383,
+          "samples_ns": [
+            3760507465,
+            3759708359,
+            3760581565
+          ],
+          "samples_ts": [
+            34.038,
+            34.0452,
+            34.0373
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:15:35Z",
+          "avg_ns": 41845519409,
+          "stddev_ns": 303654607,
+          "avg_ts": 12.235907,
+          "stddev_ts": 0.088421,
+          "samples_ns": [
+            41663026298,
+            41677481884,
+            42196050047
+          ],
+          "samples_ts": [
+            12.2891,
+            12.2848,
+            12.1338
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 505
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:19:14.805755+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:17:42Z\",\n    \"avg_ns\": 15085239449,\n    \"stddev_ns\": 1721727,\n    \"avg_ts\": 33.940463,\n    \"stddev_ts\": 0.003874,\n    \"samples_ns\": [ 15086184826, 15083252151, 15086281370 ],\n    \"samples_ts\": [ 33.9383, 33.9449, 33.9381 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:18:42Z\",\n    \"avg_ns\": 10693377173,\n    \"stddev_ns\": 103628470,\n    \"avg_ts\": 11.970779,\n    \"stddev_ts\": 0.116659,\n    \"samples_ns\": [ 10573743643, 10755364902, 10751022975 ],\n    \"samples_ts\": [ 12.1055, 11.901, 11.9058 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:17:42Z",
+          "avg_ns": 15085239449,
+          "stddev_ns": 1721727,
+          "avg_ts": 33.940463,
+          "stddev_ts": 0.003874,
+          "samples_ns": [
+            15086184826,
+            15083252151,
+            15086281370
+          ],
+          "samples_ts": [
+            33.9383,
+            33.9449,
+            33.9381
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:18:42Z",
+          "avg_ns": 10693377173,
+          "stddev_ns": 103628470,
+          "avg_ts": 11.970779,
+          "stddev_ts": 0.116659,
+          "samples_ns": [
+            10573743643,
+            10755364902,
+            10751022975
+          ],
+          "samples_ts": [
+            12.1055,
+            11.901,
+            11.9058
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 506
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:22:26.081121+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:19:15Z\",\n    \"avg_ns\": 15084119117,\n    \"stddev_ns\": 739079,\n    \"avg_ts\": 33.942983,\n    \"stddev_ts\": 0.001663,\n    \"samples_ns\": [ 15083797667, 15084964489, 15083595195 ],\n    \"samples_ts\": [ 33.9437, 33.9411, 33.9442 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:20:16Z\",\n    \"avg_ns\": 43288326892,\n    \"stddev_ns\": 68201999,\n    \"avg_ts\": 11.827689,\n    \"stddev_ts\": 0.018652,\n    \"samples_ns\": [ 43209619249, 43329982790, 43325378638 ],\n    \"samples_ts\": [ 11.8492, 11.8163, 11.8176 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:19:15Z",
+          "avg_ns": 15084119117,
+          "stddev_ns": 739079,
+          "avg_ts": 33.942983,
+          "stddev_ts": 0.001663,
+          "samples_ns": [
+            15083797667,
+            15084964489,
+            15083595195
+          ],
+          "samples_ts": [
+            33.9437,
+            33.9411,
+            33.9442
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:20:16Z",
+          "avg_ns": 43288326892,
+          "stddev_ns": 68201999,
+          "avg_ts": 11.827689,
+          "stddev_ts": 0.018652,
+          "samples_ns": [
+            43209619249,
+            43329982790,
+            43325378638
+          ],
+          "samples_ts": [
+            11.8492,
+            11.8163,
+            11.8176
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 507
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:23:14.251942+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:22:27Z\",\n    \"avg_ns\": 3759049315,\n    \"stddev_ns\": 725736,\n    \"avg_ts\": 34.051164,\n    \"stddev_ts\": 0.006574,\n    \"samples_ns\": [ 3759851540, 3758858011, 3758438394 ],\n    \"samples_ts\": [ 34.0439, 34.0529, 34.0567 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:22:42Z\",\n    \"avg_ns\": 10665889805,\n    \"stddev_ns\": 141450076,\n    \"avg_ts\": 12.002291,\n    \"stddev_ts\": 0.160270,\n    \"samples_ns\": [ 10504562606, 10768650354, 10724456455 ],\n    \"samples_ts\": [ 12.1852, 11.8864, 11.9353 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:22:27Z",
+          "avg_ns": 3759049315,
+          "stddev_ns": 725736,
+          "avg_ts": 34.051164,
+          "stddev_ts": 0.006574,
+          "samples_ns": [
+            3759851540,
+            3758858011,
+            3758438394
+          ],
+          "samples_ts": [
+            34.0439,
+            34.0529,
+            34.0567
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:22:42Z",
+          "avg_ns": 10665889805,
+          "stddev_ns": 141450076,
+          "avg_ts": 12.002291,
+          "stddev_ts": 0.16027,
+          "samples_ns": [
+            10504562606,
+            10768650354,
+            10724456455
+          ],
+          "samples_ts": [
+            12.1852,
+            11.8864,
+            11.9353
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 508
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:25:40.076120+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:23:15Z\",\n    \"avg_ns\": 3759167020,\n    \"stddev_ns\": 1471057,\n    \"avg_ts\": 34.050100,\n    \"stddev_ts\": 0.013299,\n    \"samples_ns\": [ 3758127175, 3758526972, 3760846915 ],\n    \"samples_ts\": [ 34.0595, 34.0559, 34.0349 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:23:30Z\",\n    \"avg_ns\": 43241747515,\n    \"stddev_ns\": 193787240,\n    \"avg_ts\": 11.840568,\n    \"stddev_ts\": 0.053201,\n    \"samples_ns\": [ 43017981794, 43353273091, 43353987661 ],\n    \"samples_ts\": [ 11.902, 11.81, 11.8098 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:23:15Z",
+          "avg_ns": 3759167020,
+          "stddev_ns": 1471057,
+          "avg_ts": 34.0501,
+          "stddev_ts": 0.013299,
+          "samples_ns": [
+            3758127175,
+            3758526972,
+            3760846915
+          ],
+          "samples_ts": [
+            34.0595,
+            34.0559,
+            34.0349
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:23:30Z",
+          "avg_ns": 43241747515,
+          "stddev_ns": 193787240,
+          "avg_ts": 11.840568,
+          "stddev_ts": 0.053201,
+          "samples_ns": [
+            43017981794,
+            43353273091,
+            43353987661
+          ],
+          "samples_ts": [
+            11.902,
+            11.81,
+            11.8098
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 509
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:27:13.607912+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:25:40Z\",\n    \"avg_ns\": 15099094187,\n    \"stddev_ns\": 2997384,\n    \"avg_ts\": 33.909320,\n    \"stddev_ts\": 0.006727,\n    \"samples_ns\": [ 15100711804, 15095638361, 15100932397 ],\n    \"samples_ts\": [ 33.9057, 33.9171, 33.9052 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:26:41Z\",\n    \"avg_ns\": 10687516619,\n    \"stddev_ns\": 90657097,\n    \"avg_ts\": 11.977167,\n    \"stddev_ts\": 0.102087,\n    \"samples_ns\": [ 10583055803, 10745633625, 10733860430 ],\n    \"samples_ts\": [ 12.0948, 11.9118, 11.9249 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:25:40Z",
+          "avg_ns": 15099094187,
+          "stddev_ns": 2997384,
+          "avg_ts": 33.90932,
+          "stddev_ts": 0.006727,
+          "samples_ns": [
+            15100711804,
+            15095638361,
+            15100932397
+          ],
+          "samples_ts": [
+            33.9057,
+            33.9171,
+            33.9052
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:26:41Z",
+          "avg_ns": 10687516619,
+          "stddev_ns": 90657097,
+          "avg_ts": 11.977167,
+          "stddev_ts": 0.102087,
+          "samples_ns": [
+            10583055803,
+            10745633625,
+            10733860430
+          ],
+          "samples_ts": [
+            12.0948,
+            11.9118,
+            11.9249
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 510
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:30:24.425019+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:27:14Z\",\n    \"avg_ns\": 15091867820,\n    \"stddev_ns\": 4055514,\n    \"avg_ts\": 33.925557,\n    \"stddev_ts\": 0.009111,\n    \"samples_ns\": [ 15096548563, 15089517683, 15089537215 ],\n    \"samples_ts\": [ 33.915, 33.9308, 33.9308 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:28:14Z\",\n    \"avg_ns\": 43125554906,\n    \"stddev_ns\": 82263932,\n    \"avg_ts\": 11.872340,\n    \"stddev_ts\": 0.022672,\n    \"samples_ns\": [ 43030601914, 43170738280, 43175324525 ],\n    \"samples_ts\": [ 11.8985, 11.8599, 11.8586 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:27:14Z",
+          "avg_ns": 15091867820,
+          "stddev_ns": 4055514,
+          "avg_ts": 33.925557,
+          "stddev_ts": 0.009111,
+          "samples_ns": [
+            15096548563,
+            15089517683,
+            15089537215
+          ],
+          "samples_ts": [
+            33.915,
+            33.9308,
+            33.9308
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:28:14Z",
+          "avg_ns": 43125554906,
+          "stddev_ns": 82263932,
+          "avg_ts": 11.87234,
+          "stddev_ts": 0.022672,
+          "samples_ns": [
+            43030601914,
+            43170738280,
+            43175324525
+          ],
+          "samples_ts": [
+            11.8985,
+            11.8599,
+            11.8586
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 511
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:31:12.187492+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:30:25Z\",\n    \"avg_ns\": 3763017598,\n    \"stddev_ns\": 724804,\n    \"avg_ts\": 34.015255,\n    \"stddev_ts\": 0.006528,\n    \"samples_ns\": [ 3762399586, 3762841708, 3763811501 ],\n    \"samples_ts\": [ 34.0208, 34.0168, 34.0081 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:30:40Z\",\n    \"avg_ns\": 10533892964,\n    \"stddev_ns\": 151110240,\n    \"avg_ts\": 12.152933,\n    \"stddev_ts\": 0.175690,\n    \"samples_ns\": [ 10360797969, 10639490032, 10601390892 ],\n    \"samples_ts\": [ 12.3543, 12.0307, 12.0739 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:30:25Z",
+          "avg_ns": 3763017598,
+          "stddev_ns": 724804,
+          "avg_ts": 34.015255,
+          "stddev_ts": 0.006528,
+          "samples_ns": [
+            3762399586,
+            3762841708,
+            3763811501
+          ],
+          "samples_ts": [
+            34.0208,
+            34.0168,
+            34.0081
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:30:40Z",
+          "avg_ns": 10533892964,
+          "stddev_ns": 151110240,
+          "avg_ts": 12.152933,
+          "stddev_ts": 0.17569,
+          "samples_ns": [
+            10360797969,
+            10639490032,
+            10601390892
+          ],
+          "samples_ts": [
+            12.3543,
+            12.0307,
+            12.0739
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 512
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:33:36.631573+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:31:13Z\",\n    \"avg_ns\": 3762216282,\n    \"stddev_ns\": 554987,\n    \"avg_ts\": 34.022500,\n    \"stddev_ts\": 0.004988,\n    \"samples_ns\": [ 3761643993, 3762744528, 3762260326 ],\n    \"samples_ts\": [ 34.0277, 34.0177, 34.0221 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:31:28Z\",\n    \"avg_ns\": 42774431714,\n    \"stddev_ns\": 216622886,\n    \"avg_ts\": 11.969972,\n    \"stddev_ts\": 0.060446,\n    \"samples_ns\": [ 42636784940, 43024129246, 42662380957 ],\n    \"samples_ts\": [ 12.0084, 11.9003, 12.0012 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:31:13Z",
+          "avg_ns": 3762216282,
+          "stddev_ns": 554987,
+          "avg_ts": 34.0225,
+          "stddev_ts": 0.004988,
+          "samples_ns": [
+            3761643993,
+            3762744528,
+            3762260326
+          ],
+          "samples_ts": [
+            34.0277,
+            34.0177,
+            34.0221
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:31:28Z",
+          "avg_ns": 42774431714,
+          "stddev_ns": 216622886,
+          "avg_ts": 11.969972,
+          "stddev_ts": 0.060446,
+          "samples_ns": [
+            42636784940,
+            43024129246,
+            42662380957
+          ],
+          "samples_ts": [
+            12.0084,
+            11.9003,
+            12.0012
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 513
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:35:10.813259+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:33:37Z\",\n    \"avg_ns\": 15290229982,\n    \"stddev_ns\": 2901894,\n    \"avg_ts\": 33.485436,\n    \"stddev_ts\": 0.006350,\n    \"samples_ns\": [ 15292800905, 15287087515, 15290801527 ],\n    \"samples_ts\": [ 33.4798, 33.4923, 33.4842 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:34:38Z\",\n    \"avg_ns\": 10650387508,\n    \"stddev_ns\": 171959139,\n    \"avg_ts\": 12.020449,\n    \"stddev_ts\": 0.195775,\n    \"samples_ns\": [ 10453462445, 10770880121, 10726819959 ],\n    \"samples_ts\": [ 12.2447, 11.8839, 11.9327 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:33:37Z",
+          "avg_ns": 15290229982,
+          "stddev_ns": 2901894,
+          "avg_ts": 33.485436,
+          "stddev_ts": 0.00635,
+          "samples_ns": [
+            15292800905,
+            15287087515,
+            15290801527
+          ],
+          "samples_ts": [
+            33.4798,
+            33.4923,
+            33.4842
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:34:38Z",
+          "avg_ns": 10650387508,
+          "stddev_ns": 171959139,
+          "avg_ts": 12.020449,
+          "stddev_ts": 0.195775,
+          "samples_ns": [
+            10453462445,
+            10770880121,
+            10726819959
+          ],
+          "samples_ts": [
+            12.2447,
+            11.8839,
+            11.9327
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 514
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:38:22.260746+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:35:11Z\",\n    \"avg_ns\": 15280901507,\n    \"stddev_ns\": 3133968,\n    \"avg_ts\": 33.505878,\n    \"stddev_ts\": 0.006872,\n    \"samples_ns\": [ 15282247714, 15277319357, 15283137450 ],\n    \"samples_ts\": [ 33.5029, 33.5137, 33.501 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:36:12Z\",\n    \"avg_ns\": 43084634219,\n    \"stddev_ns\": 80304656,\n    \"avg_ts\": 11.883615,\n    \"stddev_ts\": 0.022143,\n    \"samples_ns\": [ 43076259600, 43168797738, 43008845320 ],\n    \"samples_ts\": [ 11.8859, 11.8604, 11.9045 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:35:11Z",
+          "avg_ns": 15280901507,
+          "stddev_ns": 3133968,
+          "avg_ts": 33.505878,
+          "stddev_ts": 0.006872,
+          "samples_ns": [
+            15282247714,
+            15277319357,
+            15283137450
+          ],
+          "samples_ts": [
+            33.5029,
+            33.5137,
+            33.501
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:36:12Z",
+          "avg_ns": 43084634219,
+          "stddev_ns": 80304656,
+          "avg_ts": 11.883615,
+          "stddev_ts": 0.022143,
+          "samples_ns": [
+            43076259600,
+            43168797738,
+            43008845320
+          ],
+          "samples_ts": [
+            11.8859,
+            11.8604,
+            11.9045
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 515
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:39:09.969951+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:38:23Z\",\n    \"avg_ns\": 3764829171,\n    \"stddev_ns\": 2506164,\n    \"avg_ts\": 33.998897,\n    \"stddev_ts\": 0.022610,\n    \"samples_ns\": [ 3763429310, 3763337386, 3767720819 ],\n    \"samples_ts\": [ 34.0115, 34.0124, 33.9728 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:38:38Z\",\n    \"avg_ns\": 10531369681,\n    \"stddev_ns\": 162725865,\n    \"avg_ts\": 12.156103,\n    \"stddev_ts\": 0.188192,\n    \"samples_ns\": [ 10362644431, 10544118562, 10687346052 ],\n    \"samples_ts\": [ 12.3521, 12.1395, 11.9768 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:38:23Z",
+          "avg_ns": 3764829171,
+          "stddev_ns": 2506164,
+          "avg_ts": 33.998897,
+          "stddev_ts": 0.02261,
+          "samples_ns": [
+            3763429310,
+            3763337386,
+            3767720819
+          ],
+          "samples_ts": [
+            34.0115,
+            34.0124,
+            33.9728
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:38:38Z",
+          "avg_ns": 10531369681,
+          "stddev_ns": 162725865,
+          "avg_ts": 12.156103,
+          "stddev_ts": 0.188192,
+          "samples_ns": [
+            10362644431,
+            10544118562,
+            10687346052
+          ],
+          "samples_ts": [
+            12.3521,
+            12.1395,
+            11.9768
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 516
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:41:33.916545+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:39:10Z\",\n    \"avg_ns\": 3762003750,\n    \"stddev_ns\": 694797,\n    \"avg_ts\": 34.024422,\n    \"stddev_ts\": 0.006284,\n    \"samples_ns\": [ 3761962031, 3762718467, 3761330752 ],\n    \"samples_ts\": [ 34.0248, 34.018, 34.0305 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:39:25Z\",\n    \"avg_ns\": 42610134670,\n    \"stddev_ns\": 220892511,\n    \"avg_ts\": 12.016136,\n    \"stddev_ts\": 0.062317,\n    \"samples_ns\": [ 42383917388, 42621200566, 42825286058 ],\n    \"samples_ts\": [ 12.0801, 12.0128, 11.9556 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:39:10Z",
+          "avg_ns": 3762003750,
+          "stddev_ns": 694797,
+          "avg_ts": 34.024422,
+          "stddev_ts": 0.006284,
+          "samples_ns": [
+            3761962031,
+            3762718467,
+            3761330752
+          ],
+          "samples_ts": [
+            34.0248,
+            34.018,
+            34.0305
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:39:25Z",
+          "avg_ns": 42610134670,
+          "stddev_ns": 220892511,
+          "avg_ts": 12.016136,
+          "stddev_ts": 0.062317,
+          "samples_ns": [
+            42383917388,
+            42621200566,
+            42825286058
+          ],
+          "samples_ts": [
+            12.0801,
+            12.0128,
+            11.9556
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 517
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:43:07.242755+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:41:34Z\",\n    \"avg_ns\": 15082202265,\n    \"stddev_ns\": 5440786,\n    \"avg_ts\": 33.947300,\n    \"stddev_ts\": 0.012241,\n    \"samples_ns\": [ 15088477526, 15078834629, 15079294641 ],\n    \"samples_ts\": [ 33.9332, 33.9549, 33.9538 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:42:35Z\",\n    \"avg_ns\": 10646438788,\n    \"stddev_ns\": 176979038,\n    \"avg_ts\": 12.025036,\n    \"stddev_ts\": 0.201832,\n    \"samples_ns\": [ 10442089835, 10746973901, 10750252630 ],\n    \"samples_ts\": [ 12.2581, 11.9103, 11.9067 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:41:34Z",
+          "avg_ns": 15082202265,
+          "stddev_ns": 5440786,
+          "avg_ts": 33.9473,
+          "stddev_ts": 0.012241,
+          "samples_ns": [
+            15088477526,
+            15078834629,
+            15079294641
+          ],
+          "samples_ts": [
+            33.9332,
+            33.9549,
+            33.9538
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:42:35Z",
+          "avg_ns": 10646438788,
+          "stddev_ns": 176979038,
+          "avg_ts": 12.025036,
+          "stddev_ts": 0.201832,
+          "samples_ns": [
+            10442089835,
+            10746973901,
+            10750252630
+          ],
+          "samples_ts": [
+            12.2581,
+            11.9103,
+            11.9067
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 518
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:46:18.315048+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:43:08Z\",\n    \"avg_ns\": 15086584533,\n    \"stddev_ns\": 1441814,\n    \"avg_ts\": 33.937436,\n    \"stddev_ts\": 0.003232,\n    \"samples_ns\": [ 15087522767, 15084930709, 15087300124 ],\n    \"samples_ts\": [ 33.9353, 33.9412, 33.9358 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:44:08Z\",\n    \"avg_ns\": 43220025926,\n    \"stddev_ns\": 129733850,\n    \"avg_ts\": 11.846432,\n    \"stddev_ts\": 0.035621,\n    \"samples_ns\": [ 43070322037, 43299614017, 43290141724 ],\n    \"samples_ts\": [ 11.8875, 11.8246, 11.8272 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:43:08Z",
+          "avg_ns": 15086584533,
+          "stddev_ns": 1441814,
+          "avg_ts": 33.937436,
+          "stddev_ts": 0.003232,
+          "samples_ns": [
+            15087522767,
+            15084930709,
+            15087300124
+          ],
+          "samples_ts": [
+            33.9353,
+            33.9412,
+            33.9358
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:44:08Z",
+          "avg_ns": 43220025926,
+          "stddev_ns": 129733850,
+          "avg_ts": 11.846432,
+          "stddev_ts": 0.035621,
+          "samples_ns": [
+            43070322037,
+            43299614017,
+            43290141724
+          ],
+          "samples_ts": [
+            11.8875,
+            11.8246,
+            11.8272
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 519
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:47:06.318002+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:46:19Z\",\n    \"avg_ns\": 3762632524,\n    \"stddev_ns\": 566078,\n    \"avg_ts\": 34.018736,\n    \"stddev_ts\": 0.005118,\n    \"samples_ns\": [ 3762012270, 3763121282, 3762764020 ],\n    \"samples_ts\": [ 34.0243, 34.0143, 34.0175 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:46:34Z\",\n    \"avg_ns\": 10631235472,\n    \"stddev_ns\": 144000615,\n    \"avg_ts\": 12.041477,\n    \"stddev_ts\": 0.164388,\n    \"samples_ns\": [ 10464960445, 10713580401, 10715165571 ],\n    \"samples_ts\": [ 12.2313, 11.9475, 11.9457 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:46:19Z",
+          "avg_ns": 3762632524,
+          "stddev_ns": 566078,
+          "avg_ts": 34.018736,
+          "stddev_ts": 0.005118,
+          "samples_ns": [
+            3762012270,
+            3763121282,
+            3762764020
+          ],
+          "samples_ts": [
+            34.0243,
+            34.0143,
+            34.0175
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:46:34Z",
+          "avg_ns": 10631235472,
+          "stddev_ns": 144000615,
+          "avg_ts": 12.041477,
+          "stddev_ts": 0.164388,
+          "samples_ns": [
+            10464960445,
+            10713580401,
+            10715165571
+          ],
+          "samples_ts": [
+            12.2313,
+            11.9475,
+            11.9457
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 520
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:49:32.379709+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:47:07Z\",\n    \"avg_ns\": 3759762027,\n    \"stddev_ns\": 2889908,\n    \"avg_ts\": 34.044721,\n    \"stddev_ts\": 0.026146,\n    \"samples_ns\": [ 3757718575, 3758500713, 3763066795 ],\n    \"samples_ts\": [ 34.0632, 34.0561, 34.0148 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:47:22Z\",\n    \"avg_ns\": 43311623304,\n    \"stddev_ns\": 144272667,\n    \"avg_ts\": 11.821395,\n    \"stddev_ts\": 0.039452,\n    \"samples_ns\": [ 43145437460, 43404778938, 43384653515 ],\n    \"samples_ts\": [ 11.8668, 11.7959, 11.8014 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:47:07Z",
+          "avg_ns": 3759762027,
+          "stddev_ns": 2889908,
+          "avg_ts": 34.044721,
+          "stddev_ts": 0.026146,
+          "samples_ns": [
+            3757718575,
+            3758500713,
+            3763066795
+          ],
+          "samples_ts": [
+            34.0632,
+            34.0561,
+            34.0148
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:47:22Z",
+          "avg_ns": 43311623304,
+          "stddev_ns": 144272667,
+          "avg_ts": 11.821395,
+          "stddev_ts": 0.039452,
+          "samples_ns": [
+            43145437460,
+            43404778938,
+            43384653515
+          ],
+          "samples_ts": [
+            11.8668,
+            11.7959,
+            11.8014
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 521
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:51:06.036766+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:49:33Z\",\n    \"avg_ns\": 15111606763,\n    \"stddev_ns\": 610246,\n    \"avg_ts\": 33.881242,\n    \"stddev_ts\": 0.001368,\n    \"samples_ns\": [ 15112219104, 15111602553, 15110998632 ],\n    \"samples_ts\": [ 33.8799, 33.8813, 33.8826 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:50:33Z\",\n    \"avg_ns\": 10710884090,\n    \"stddev_ns\": 65896454,\n    \"avg_ts\": 11.950763,\n    \"stddev_ts\": 0.073787,\n    \"samples_ns\": [ 10634794728, 10749301217, 10748556326 ],\n    \"samples_ts\": [ 12.036, 11.9078, 11.9086 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:49:33Z",
+          "avg_ns": 15111606763,
+          "stddev_ns": 610246,
+          "avg_ts": 33.881242,
+          "stddev_ts": 0.001368,
+          "samples_ns": [
+            15112219104,
+            15111602553,
+            15110998632
+          ],
+          "samples_ts": [
+            33.8799,
+            33.8813,
+            33.8826
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:50:33Z",
+          "avg_ns": 10710884090,
+          "stddev_ns": 65896454,
+          "avg_ts": 11.950763,
+          "stddev_ts": 0.073787,
+          "samples_ns": [
+            10634794728,
+            10749301217,
+            10748556326
+          ],
+          "samples_ts": [
+            12.036,
+            11.9078,
+            11.9086
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 522
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:54:17.269557+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:51:06Z\",\n    \"avg_ns\": 15111366432,\n    \"stddev_ns\": 1116722,\n    \"avg_ts\": 33.881781,\n    \"stddev_ts\": 0.002489,\n    \"samples_ns\": [ 15111420418, 15110230489, 15112448390 ],\n    \"samples_ts\": [ 33.8817, 33.8843, 33.8794 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:52:07Z\",\n    \"avg_ns\": 43238133889,\n    \"stddev_ns\": 94730526,\n    \"avg_ts\": 11.841437,\n    \"stddev_ts\": 0.025976,\n    \"samples_ns\": [ 43128927351, 43298144107, 43287330211 ],\n    \"samples_ts\": [ 11.8714, 11.825, 11.8279 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:51:06Z",
+          "avg_ns": 15111366432,
+          "stddev_ns": 1116722,
+          "avg_ts": 33.881781,
+          "stddev_ts": 0.002489,
+          "samples_ns": [
+            15111420418,
+            15110230489,
+            15112448390
+          ],
+          "samples_ts": [
+            33.8817,
+            33.8843,
+            33.8794
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:52:07Z",
+          "avg_ns": 43238133889,
+          "stddev_ns": 94730526,
+          "avg_ts": 11.841437,
+          "stddev_ts": 0.025976,
+          "samples_ns": [
+            43128927351,
+            43298144107,
+            43287330211
+          ],
+          "samples_ts": [
+            11.8714,
+            11.825,
+            11.8279
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 523
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:55:05.361239+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:54:18Z\",\n    \"avg_ns\": 3760245387,\n    \"stddev_ns\": 1473919,\n    \"avg_ts\": 34.040335,\n    \"stddev_ts\": 0.013317,\n    \"samples_ns\": [ 3759166532, 3761921465, 3759648166 ],\n    \"samples_ts\": [ 34.0501, 34.0252, 34.0457 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:54:33Z\",\n    \"avg_ns\": 10661019609,\n    \"stddev_ns\": 130518407,\n    \"avg_ts\": 12.007565,\n    \"stddev_ts\": 0.148050,\n    \"samples_ns\": [ 10510312274, 10735645898, 10737100655 ],\n    \"samples_ts\": [ 12.1785, 11.9229, 11.9213 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:54:18Z",
+          "avg_ns": 3760245387,
+          "stddev_ns": 1473919,
+          "avg_ts": 34.040335,
+          "stddev_ts": 0.013317,
+          "samples_ns": [
+            3759166532,
+            3761921465,
+            3759648166
+          ],
+          "samples_ts": [
+            34.0501,
+            34.0252,
+            34.0457
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:54:33Z",
+          "avg_ns": 10661019609,
+          "stddev_ns": 130518407,
+          "avg_ts": 12.007565,
+          "stddev_ts": 0.14805,
+          "samples_ns": [
+            10510312274,
+            10735645898,
+            10737100655
+          ],
+          "samples_ts": [
+            12.1785,
+            11.9229,
+            11.9213
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 524
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:57:30.457876+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:55:06Z\",\n    \"avg_ns\": 3757933549,\n    \"stddev_ns\": 1351185,\n    \"avg_ts\": 34.061276,\n    \"stddev_ts\": 0.012236,\n    \"samples_ns\": [ 3756452362, 3758254016, 3759094270 ],\n    \"samples_ts\": [ 34.0747, 34.0584, 34.0508 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:55:21Z\",\n    \"avg_ns\": 42998777287,\n    \"stddev_ns\": 278150952,\n    \"avg_ts\": 11.907649,\n    \"stddev_ts\": 0.077317,\n    \"samples_ns\": [ 42677599421, 43158162027, 43160570415 ],\n    \"samples_ts\": [ 11.9969, 11.8633, 11.8627 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:55:06Z",
+          "avg_ns": 3757933549,
+          "stddev_ns": 1351185,
+          "avg_ts": 34.061276,
+          "stddev_ts": 0.012236,
+          "samples_ns": [
+            3756452362,
+            3758254016,
+            3759094270
+          ],
+          "samples_ts": [
+            34.0747,
+            34.0584,
+            34.0508
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:55:21Z",
+          "avg_ns": 42998777287,
+          "stddev_ns": 278150952,
+          "avg_ts": 11.907649,
+          "stddev_ts": 0.077317,
+          "samples_ns": [
+            42677599421,
+            43158162027,
+            43160570415
+          ],
+          "samples_ts": [
+            11.9969,
+            11.8633,
+            11.8627
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 525
+    },
+    {
+      "timestamp_utc": "2025-12-09T04:59:04.674708+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:57:31Z\",\n    \"avg_ns\": 15274243572,\n    \"stddev_ns\": 724192,\n    \"avg_ts\": 33.520482,\n    \"stddev_ts\": 0.001542,\n    \"samples_ns\": [ 15273435021, 15274587942, 15274707755 ],\n    \"samples_ts\": [ 33.5223, 33.5197, 33.5195 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:58:32Z\",\n    \"avg_ns\": 10666558945,\n    \"stddev_ns\": 182912866,\n    \"avg_ts\": 12.002498,\n    \"stddev_ts\": 0.207879,\n    \"samples_ns\": [ 10455354987, 10773488086, 10770833764 ],\n    \"samples_ts\": [ 12.2425, 11.881, 11.8839 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:57:31Z",
+          "avg_ns": 15274243572,
+          "stddev_ns": 724192,
+          "avg_ts": 33.520482,
+          "stddev_ts": 0.001542,
+          "samples_ns": [
+            15273435021,
+            15274587942,
+            15274707755
+          ],
+          "samples_ts": [
+            33.5223,
+            33.5197,
+            33.5195
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:58:32Z",
+          "avg_ns": 10666558945,
+          "stddev_ns": 182912866,
+          "avg_ts": 12.002498,
+          "stddev_ts": 0.207879,
+          "samples_ns": [
+            10455354987,
+            10773488086,
+            10770833764
+          ],
+          "samples_ts": [
+            12.2425,
+            11.881,
+            11.8839
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 526
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:02:16.262563+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T04:59:05Z\",\n    \"avg_ns\": 15278339741,\n    \"stddev_ns\": 1949914,\n    \"avg_ts\": 33.511495,\n    \"stddev_ts\": 0.004277,\n    \"samples_ns\": [ 15280587497, 15277102455, 15277329271 ],\n    \"samples_ts\": [ 33.5066, 33.5142, 33.5137 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:00:06Z\",\n    \"avg_ns\": 43134987224,\n    \"stddev_ns\": 55052928,\n    \"avg_ts\": 11.869728,\n    \"stddev_ts\": 0.015148,\n    \"samples_ns\": [ 43081013713, 43191059543, 43132888416 ],\n    \"samples_ts\": [ 11.8846, 11.8543, 11.8703 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T04:59:05Z",
+          "avg_ns": 15278339741,
+          "stddev_ns": 1949914,
+          "avg_ts": 33.511495,
+          "stddev_ts": 0.004277,
+          "samples_ns": [
+            15280587497,
+            15277102455,
+            15277329271
+          ],
+          "samples_ts": [
+            33.5066,
+            33.5142,
+            33.5137
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:00:06Z",
+          "avg_ns": 43134987224,
+          "stddev_ns": 55052928,
+          "avg_ts": 11.869728,
+          "stddev_ts": 0.015148,
+          "samples_ns": [
+            43081013713,
+            43191059543,
+            43132888416
+          ],
+          "samples_ts": [
+            11.8846,
+            11.8543,
+            11.8703
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 527
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:03:03.858227+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:02:17Z\",\n    \"avg_ns\": 3759217820,\n    \"stddev_ns\": 392828,\n    \"avg_ts\": 34.049637,\n    \"stddev_ts\": 0.003471,\n    \"samples_ns\": [ 3759473192, 3759403001, 3758777269 ],\n    \"samples_ts\": [ 34.0473, 34.048, 34.0536 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:02:32Z\",\n    \"avg_ns\": 10477855518,\n    \"stddev_ns\": 164447978,\n    \"avg_ts\": 12.218232,\n    \"stddev_ts\": 0.190321,\n    \"samples_ns\": [ 10353416397, 10415860365, 10664289794 ],\n    \"samples_ts\": [ 12.3631, 12.289, 12.0027 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:02:17Z",
+          "avg_ns": 3759217820,
+          "stddev_ns": 392828,
+          "avg_ts": 34.049637,
+          "stddev_ts": 0.003471,
+          "samples_ns": [
+            3759473192,
+            3759403001,
+            3758777269
+          ],
+          "samples_ts": [
+            34.0473,
+            34.048,
+            34.0536
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:02:32Z",
+          "avg_ns": 10477855518,
+          "stddev_ns": 164447978,
+          "avg_ts": 12.218232,
+          "stddev_ts": 0.190321,
+          "samples_ns": [
+            10353416397,
+            10415860365,
+            10664289794
+          ],
+          "samples_ts": [
+            12.3631,
+            12.289,
+            12.0027
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 528
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:05:27.207370+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:03:04Z\",\n    \"avg_ns\": 3762337247,\n    \"stddev_ns\": 924120,\n    \"avg_ts\": 34.021407,\n    \"stddev_ts\": 0.008320,\n    \"samples_ns\": [ 3763214355, 3761379573, 3762417815 ],\n    \"samples_ts\": [ 34.0135, 34.0301, 34.0207 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:03:19Z\",\n    \"avg_ns\": 42413161674,\n    \"stddev_ns\": 298294811,\n    \"avg_ts\": 12.072122,\n    \"stddev_ts\": 0.084815,\n    \"samples_ns\": [ 42131476031, 42382337103, 42725671888 ],\n    \"samples_ts\": [ 12.1524, 12.0805, 11.9834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:03:04Z",
+          "avg_ns": 3762337247,
+          "stddev_ns": 924120,
+          "avg_ts": 34.021407,
+          "stddev_ts": 0.00832,
+          "samples_ns": [
+            3763214355,
+            3761379573,
+            3762417815
+          ],
+          "samples_ts": [
+            34.0135,
+            34.0301,
+            34.0207
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:03:19Z",
+          "avg_ns": 42413161674,
+          "stddev_ns": 298294811,
+          "avg_ts": 12.072122,
+          "stddev_ts": 0.084815,
+          "samples_ns": [
+            42131476031,
+            42382337103,
+            42725671888
+          ],
+          "samples_ts": [
+            12.1524,
+            12.0805,
+            11.9834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 529
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:07:00.399062+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:05:28Z\",\n    \"avg_ns\": 15081336354,\n    \"stddev_ns\": 3347948,\n    \"avg_ts\": 33.949247,\n    \"stddev_ts\": 0.007532,\n    \"samples_ns\": [ 15084615178, 15081466310, 15077927575 ],\n    \"samples_ts\": [ 33.9419, 33.949, 33.9569 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:06:28Z\",\n    \"avg_ns\": 10602291688,\n    \"stddev_ns\": 212270070,\n    \"avg_ts\": 12.076111,\n    \"stddev_ts\": 0.243478,\n    \"samples_ns\": [ 10369017332, 10653768546, 10784089188 ],\n    \"samples_ts\": [ 12.3445, 12.0145, 11.8693 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:05:28Z",
+          "avg_ns": 15081336354,
+          "stddev_ns": 3347948,
+          "avg_ts": 33.949247,
+          "stddev_ts": 0.007532,
+          "samples_ns": [
+            15084615178,
+            15081466310,
+            15077927575
+          ],
+          "samples_ts": [
+            33.9419,
+            33.949,
+            33.9569
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:06:28Z",
+          "avg_ns": 10602291688,
+          "stddev_ns": 212270070,
+          "avg_ts": 12.076111,
+          "stddev_ts": 0.243478,
+          "samples_ns": [
+            10369017332,
+            10653768546,
+            10784089188
+          ],
+          "samples_ts": [
+            12.3445,
+            12.0145,
+            11.8693
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 530
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:10:10.534263+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:07:01Z\",\n    \"avg_ns\": 15081739491,\n    \"stddev_ns\": 1760383,\n    \"avg_ts\": 33.948339,\n    \"stddev_ts\": 0.003943,\n    \"samples_ns\": [ 15080836652, 15083758536, 15080623287 ],\n    \"samples_ts\": [ 33.9504, 33.9438, 33.9509 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:08:01Z\",\n    \"avg_ns\": 42914346916,\n    \"stddev_ns\": 367541821,\n    \"avg_ts\": 11.931328,\n    \"stddev_ts\": 0.102672,\n    \"samples_ns\": [ 43089311562, 43161718942, 42492010246 ],\n    \"samples_ts\": [ 11.8823, 11.8624, 12.0493 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:07:01Z",
+          "avg_ns": 15081739491,
+          "stddev_ns": 1760383,
+          "avg_ts": 33.948339,
+          "stddev_ts": 0.003943,
+          "samples_ns": [
+            15080836652,
+            15083758536,
+            15080623287
+          ],
+          "samples_ts": [
+            33.9504,
+            33.9438,
+            33.9509
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:08:01Z",
+          "avg_ns": 42914346916,
+          "stddev_ns": 367541821,
+          "avg_ts": 11.931328,
+          "stddev_ts": 0.102672,
+          "samples_ns": [
+            43089311562,
+            43161718942,
+            42492010246
+          ],
+          "samples_ts": [
+            11.8823,
+            11.8624,
+            12.0493
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 531
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:10:57.819355+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:10:11Z\",\n    \"avg_ns\": 3772660782,\n    \"stddev_ns\": 16797066,\n    \"avg_ts\": 33.928756,\n    \"stddev_ts\": 0.150677,\n    \"samples_ns\": [ 3792035224, 3763755173, 3762191950 ],\n    \"samples_ts\": [ 33.755, 34.0086, 34.0227 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:10:26Z\",\n    \"avg_ns\": 10363949865,\n    \"stddev_ns\": 45247028,\n    \"avg_ts\": 12.350660,\n    \"stddev_ts\": 0.053786,\n    \"samples_ns\": [ 10339309925, 10336370604, 10416169066 ],\n    \"samples_ts\": [ 12.3799, 12.3835, 12.2886 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:10:11Z",
+          "avg_ns": 3772660782,
+          "stddev_ns": 16797066,
+          "avg_ts": 33.928756,
+          "stddev_ts": 0.150677,
+          "samples_ns": [
+            3792035224,
+            3763755173,
+            3762191950
+          ],
+          "samples_ts": [
+            33.755,
+            34.0086,
+            34.0227
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:10:26Z",
+          "avg_ns": 10363949865,
+          "stddev_ns": 45247028,
+          "avg_ts": 12.35066,
+          "stddev_ts": 0.053786,
+          "samples_ns": [
+            10339309925,
+            10336370604,
+            10416169066
+          ],
+          "samples_ts": [
+            12.3799,
+            12.3835,
+            12.2886
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 532
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:13:21.696429+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:10:58Z\",\n    \"avg_ns\": 3759815322,\n    \"stddev_ns\": 416733,\n    \"avg_ts\": 34.044226,\n    \"stddev_ts\": 0.003732,\n    \"samples_ns\": [ 3760263025, 3759451547, 3759731395 ],\n    \"samples_ts\": [ 34.0402, 34.0475, 34.045 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:11:13Z\",\n    \"avg_ns\": 42590016430,\n    \"stddev_ns\": 3060016973,\n    \"avg_ts\": 12.022217,\n    \"stddev_ts\": 0.105799,\n    \"samples_ns\": [ 42208762938, 42603679657, 42957606697 ],\n    \"samples_ts\": [ 12.1302, 12.0177, 11.9187 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:10:58Z",
+          "avg_ns": 3759815322,
+          "stddev_ns": 416733,
+          "avg_ts": 34.044226,
+          "stddev_ts": 0.003732,
+          "samples_ns": [
+            3760263025,
+            3759451547,
+            3759731395
+          ],
+          "samples_ts": [
+            34.0402,
+            34.0475,
+            34.045
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:11:13Z",
+          "avg_ns": 42590016430,
+          "stddev_ns": 3060016973,
+          "avg_ts": 12.022217,
+          "stddev_ts": 0.105799,
+          "samples_ns": [
+            42208762938,
+            42603679657,
+            42957606697
+          ],
+          "samples_ts": [
+            12.1302,
+            12.0177,
+            11.9187
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 533
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:14:55.197399+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:13:22Z\",\n    \"avg_ns\": 15100824608,\n    \"stddev_ns\": 2851873,\n    \"avg_ts\": 33.905434,\n    \"stddev_ts\": 0.006398,\n    \"samples_ns\": [ 15101511103, 15097694853, 15103267869 ],\n    \"samples_ts\": [ 33.9039, 33.9125, 33.8999 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:14:23Z\",\n    \"avg_ns\": 10676888726,\n    \"stddev_ns\": 81842815,\n    \"avg_ts\": 11.988983,\n    \"stddev_ts\": 0.092306,\n    \"samples_ns\": [ 10582466391, 10720699378, 10727500409 ],\n    \"samples_ts\": [ 12.0955, 11.9395, 11.932 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:13:22Z",
+          "avg_ns": 15100824608,
+          "stddev_ns": 2851873,
+          "avg_ts": 33.905434,
+          "stddev_ts": 0.006398,
+          "samples_ns": [
+            15101511103,
+            15097694853,
+            15103267869
+          ],
+          "samples_ts": [
+            33.9039,
+            33.9125,
+            33.8999
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:14:23Z",
+          "avg_ns": 10676888726,
+          "stddev_ns": 81842815,
+          "avg_ts": 11.988983,
+          "stddev_ts": 0.092306,
+          "samples_ns": [
+            10582466391,
+            10720699378,
+            10727500409
+          ],
+          "samples_ts": [
+            12.0955,
+            11.9395,
+            11.932
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 534
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:18:06.241632+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:14:56Z\",\n    \"avg_ns\": 15098116843,\n    \"stddev_ns\": 1392580,\n    \"avg_ts\": 33.911514,\n    \"stddev_ts\": 0.003128,\n    \"samples_ns\": [ 15097842481, 15099626185, 15096881863 ],\n    \"samples_ts\": [ 33.9121, 33.9081, 33.9143 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:15:56Z\",\n    \"avg_ns\": 43189687554,\n    \"stddev_ns\": 84840601,\n    \"avg_ts\": 11.854712,\n    \"stddev_ts\": 0.023311,\n    \"samples_ns\": [ 43092849454, 43250937351, 43225275859 ],\n    \"samples_ts\": [ 11.8813, 11.8379, 11.8449 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:14:56Z",
+          "avg_ns": 15098116843,
+          "stddev_ns": 1392580,
+          "avg_ts": 33.911514,
+          "stddev_ts": 0.003128,
+          "samples_ns": [
+            15097842481,
+            15099626185,
+            15096881863
+          ],
+          "samples_ts": [
+            33.9121,
+            33.9081,
+            33.9143
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:15:56Z",
+          "avg_ns": 43189687554,
+          "stddev_ns": 84840601,
+          "avg_ts": 11.854712,
+          "stddev_ts": 0.023311,
+          "samples_ns": [
+            43092849454,
+            43250937351,
+            43225275859
+          ],
+          "samples_ts": [
+            11.8813,
+            11.8379,
+            11.8449
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 535
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:18:54.402250+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:18:07Z\",\n    \"avg_ns\": 3759869937,\n    \"stddev_ns\": 1358863,\n    \"avg_ts\": 34.043734,\n    \"stddev_ts\": 0.012290,\n    \"samples_ns\": [ 3758718601, 3759524428, 3761366783 ],\n    \"samples_ts\": [ 34.0542, 34.0469, 34.0302 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:18:22Z\",\n    \"avg_ns\": 10682389795,\n    \"stddev_ns\": 155059248,\n    \"avg_ts\": 11.984034,\n    \"stddev_ts\": 0.175327,\n    \"samples_ns\": [ 10504683144, 10752305874, 10790180368 ],\n    \"samples_ts\": [ 12.185, 11.9044, 11.8626 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:18:07Z",
+          "avg_ns": 3759869937,
+          "stddev_ns": 1358863,
+          "avg_ts": 34.043734,
+          "stddev_ts": 0.01229,
+          "samples_ns": [
+            3758718601,
+            3759524428,
+            3761366783
+          ],
+          "samples_ts": [
+            34.0542,
+            34.0469,
+            34.0302
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:18:22Z",
+          "avg_ns": 10682389795,
+          "stddev_ns": 155059248,
+          "avg_ts": 11.984034,
+          "stddev_ts": 0.175327,
+          "samples_ns": [
+            10504683144,
+            10752305874,
+            10790180368
+          ],
+          "samples_ts": [
+            12.185,
+            11.9044,
+            11.8626
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 536
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:21:19.216400+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:18:55Z\",\n    \"avg_ns\": 3763140115,\n    \"stddev_ns\": 469666,\n    \"avg_ts\": 34.014147,\n    \"stddev_ts\": 0.004209,\n    \"samples_ns\": [ 3762820886, 3762925039, 3763674421 ],\n    \"samples_ts\": [ 34.017, 34.0161, 34.0093 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:19:10Z\",\n    \"avg_ns\": 42898808304,\n    \"stddev_ns\": 204236236,\n    \"avg_ts\": 11.935244,\n    \"stddev_ts\": 0.056902,\n    \"samples_ns\": [ 42676971740, 42940417436, 43079035738 ],\n    \"samples_ts\": [ 11.9971, 11.9235, 11.8851 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:18:55Z",
+          "avg_ns": 3763140115,
+          "stddev_ns": 469666,
+          "avg_ts": 34.014147,
+          "stddev_ts": 0.004209,
+          "samples_ns": [
+            3762820886,
+            3762925039,
+            3763674421
+          ],
+          "samples_ts": [
+            34.017,
+            34.0161,
+            34.0093
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:19:10Z",
+          "avg_ns": 42898808304,
+          "stddev_ns": 204236236,
+          "avg_ts": 11.935244,
+          "stddev_ts": 0.056902,
+          "samples_ns": [
+            42676971740,
+            42940417436,
+            43079035738
+          ],
+          "samples_ts": [
+            11.9971,
+            11.9235,
+            11.8851
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 537
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:22:53.614707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:21:20Z\",\n    \"avg_ns\": 15316823141,\n    \"stddev_ns\": 1227206,\n    \"avg_ts\": 33.427297,\n    \"stddev_ts\": 0.002678,\n    \"samples_ns\": [ 15318069326, 15316784259, 15315615838 ],\n    \"samples_ts\": [ 33.4246, 33.4274, 33.4299 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:22:21Z\",\n    \"avg_ns\": 10686383103,\n    \"stddev_ns\": 167745521,\n    \"avg_ts\": 11.979846,\n    \"stddev_ts\": 0.189760,\n    \"samples_ns\": [ 10492802958, 10777376138, 10788970213 ],\n    \"samples_ts\": [ 12.1988, 11.8767, 11.864 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:21:20Z",
+          "avg_ns": 15316823141,
+          "stddev_ns": 1227206,
+          "avg_ts": 33.427297,
+          "stddev_ts": 0.002678,
+          "samples_ns": [
+            15318069326,
+            15316784259,
+            15315615838
+          ],
+          "samples_ts": [
+            33.4246,
+            33.4274,
+            33.4299
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:22:21Z",
+          "avg_ns": 10686383103,
+          "stddev_ns": 167745521,
+          "avg_ts": 11.979846,
+          "stddev_ts": 0.18976,
+          "samples_ns": [
+            10492802958,
+            10777376138,
+            10788970213
+          ],
+          "samples_ts": [
+            12.1988,
+            11.8767,
+            11.864
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 538
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:26:05.652975+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:22:54Z\",\n    \"avg_ns\": 15321065405,\n    \"stddev_ns\": 2466756,\n    \"avg_ts\": 33.418042,\n    \"stddev_ts\": 0.005381,\n    \"samples_ns\": [ 15323272318, 15318402449, 15321521448 ],\n    \"samples_ts\": [ 33.4132, 33.4239, 33.417 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:23:55Z\",\n    \"avg_ns\": 43228220709,\n    \"stddev_ns\": 78932446,\n    \"avg_ts\": 11.844141,\n    \"stddev_ts\": 0.021650,\n    \"samples_ns\": [ 43137077697, 43273828654, 43273755777 ],\n    \"samples_ts\": [ 11.8691, 11.8316, 11.8317 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:22:54Z",
+          "avg_ns": 15321065405,
+          "stddev_ns": 2466756,
+          "avg_ts": 33.418042,
+          "stddev_ts": 0.005381,
+          "samples_ns": [
+            15323272318,
+            15318402449,
+            15321521448
+          ],
+          "samples_ts": [
+            33.4132,
+            33.4239,
+            33.417
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:23:55Z",
+          "avg_ns": 43228220709,
+          "stddev_ns": 78932446,
+          "avg_ts": 11.844141,
+          "stddev_ts": 0.02165,
+          "samples_ns": [
+            43137077697,
+            43273828654,
+            43273755777
+          ],
+          "samples_ts": [
+            11.8691,
+            11.8316,
+            11.8317
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 539
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:26:54.795629+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:26:06Z\",\n    \"avg_ns\": 2921545650,\n    \"stddev_ns\": 35050367,\n    \"avg_ts\": 43.816659,\n    \"stddev_ts\": 0.529338,\n    \"samples_ns\": [ 2881080432, 2942451200, 2941105318 ],\n    \"samples_ts\": [ 44.4278, 43.5011, 43.5211 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:26:18Z\",\n    \"avg_ns\": 12139360331,\n    \"stddev_ns\": 91029410,\n    \"avg_ts\": 10.544610,\n    \"stddev_ts\": 0.079364,\n    \"samples_ns\": [ 12036004203, 12174471123, 12207605668 ],\n    \"samples_ts\": [ 10.6348, 10.5138, 10.4853 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:26:06Z",
+          "avg_ns": 2921545650,
+          "stddev_ns": 35050367,
+          "avg_ts": 43.816659,
+          "stddev_ts": 0.529338,
+          "samples_ns": [
+            2881080432,
+            2942451200,
+            2941105318
+          ],
+          "samples_ts": [
+            44.4278,
+            43.5011,
+            43.5211
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:26:18Z",
+          "avg_ns": 12139360331,
+          "stddev_ns": 91029410,
+          "avg_ts": 10.54461,
+          "stddev_ts": 0.079364,
+          "samples_ns": [
+            12036004203,
+            12174471123,
+            12207605668
+          ],
+          "samples_ts": [
+            10.6348,
+            10.5138,
+            10.4853
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 540
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:29:34.128349+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:26:55Z\",\n    \"avg_ns\": 2915086924,\n    \"stddev_ns\": 67260496,\n    \"avg_ts\": 43.924882,\n    \"stddev_ts\": 1.000166,\n    \"samples_ns\": [ 2876510205, 2992752069, 2875998499 ],\n    \"samples_ts\": [ 44.4984, 42.77, 44.5063 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:27:07Z\",\n    \"avg_ns\": 48873802036,\n    \"stddev_ns\": 192304198,\n    \"avg_ts\": 10.476068,\n    \"stddev_ts\": 0.041191,\n    \"samples_ns\": [ 48694410578, 48850162708, 49076832824 ],\n    \"samples_ts\": [ 10.5146, 10.481, 10.4326 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:26:55Z",
+          "avg_ns": 2915086924,
+          "stddev_ns": 67260496,
+          "avg_ts": 43.924882,
+          "stddev_ts": 1.000166,
+          "samples_ns": [
+            2876510205,
+            2992752069,
+            2875998499
+          ],
+          "samples_ts": [
+            44.4984,
+            42.77,
+            44.5063
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:27:07Z",
+          "avg_ns": 48873802036,
+          "stddev_ns": 192304198,
+          "avg_ts": 10.476068,
+          "stddev_ts": 0.041191,
+          "samples_ns": [
+            48694410578,
+            48850162708,
+            49076832824
+          ],
+          "samples_ts": [
+            10.5146,
+            10.481,
+            10.4326
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 541
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:30:58.599470+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:29:35Z\",\n    \"avg_ns\": 11774553163,\n    \"stddev_ns\": 145344713,\n    \"avg_ts\": 43.488019,\n    \"stddev_ts\": 0.536614,\n    \"samples_ns\": [ 11631006155, 11921631079, 11771022257 ],\n    \"samples_ts\": [ 44.0203, 42.9471, 43.4966 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:30:22Z\",\n    \"avg_ns\": 12136082242,\n    \"stddev_ns\": 9053466,\n    \"avg_ts\": 10.547065,\n    \"stddev_ts\": 0.007866,\n    \"samples_ns\": [ 12128051175, 12145892566, 12134302986 ],\n    \"samples_ts\": [ 10.554, 10.5385, 10.5486 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:29:35Z",
+          "avg_ns": 11774553163,
+          "stddev_ns": 145344713,
+          "avg_ts": 43.488019,
+          "stddev_ts": 0.536614,
+          "samples_ns": [
+            11631006155,
+            11921631079,
+            11771022257
+          ],
+          "samples_ts": [
+            44.0203,
+            42.9471,
+            43.4966
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:30:22Z",
+          "avg_ns": 12136082242,
+          "stddev_ns": 9053466,
+          "avg_ts": 10.547065,
+          "stddev_ts": 0.007866,
+          "samples_ns": [
+            12128051175,
+            12145892566,
+            12134302986
+          ],
+          "samples_ts": [
+            10.554,
+            10.5385,
+            10.5486
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 542
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:34:12.610547+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:30:59Z\",\n    \"avg_ns\": 11547385633,\n    \"stddev_ns\": 59521618,\n    \"avg_ts\": 44.339824,\n    \"stddev_ts\": 0.227890,\n    \"samples_ns\": [ 11508777193, 11615932764, 11517446942 ],\n    \"samples_ts\": [ 44.4878, 44.0774, 44.4543 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:31:45Z\",\n    \"avg_ns\": 48924148126,\n    \"stddev_ns\": 58701448,\n    \"avg_ts\": 10.465190,\n    \"stddev_ts\": 0.012550,\n    \"samples_ns\": [ 48878276803, 48990299856, 48903867721 ],\n    \"samples_ts\": [ 10.475, 10.451, 10.4695 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:30:59Z",
+          "avg_ns": 11547385633,
+          "stddev_ns": 59521618,
+          "avg_ts": 44.339824,
+          "stddev_ts": 0.22789,
+          "samples_ns": [
+            11508777193,
+            11615932764,
+            11517446942
+          ],
+          "samples_ts": [
+            44.4878,
+            44.0774,
+            44.4543
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:31:45Z",
+          "avg_ns": 48924148126,
+          "stddev_ns": 58701448,
+          "avg_ts": 10.46519,
+          "stddev_ts": 0.01255,
+          "samples_ns": [
+            48878276803,
+            48990299856,
+            48903867721
+          ],
+          "samples_ts": [
+            10.475,
+            10.451,
+            10.4695
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 543
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:35:01.661424+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:34:13Z\",\n    \"avg_ns\": 2882360322,\n    \"stddev_ns\": 1226033,\n    \"avg_ts\": 44.408055,\n    \"stddev_ts\": 0.018889,\n    \"samples_ns\": [ 2883606143, 2881155084, 2882319739 ],\n    \"samples_ts\": [ 44.3889, 44.4266, 44.4087 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:34:25Z\",\n    \"avg_ns\": 12132645288,\n    \"stddev_ns\": 51678750,\n    \"avg_ts\": 10.550177,\n    \"stddev_ts\": 0.045041,\n    \"samples_ns\": [ 12073438821, 12168700402, 12155796642 ],\n    \"samples_ts\": [ 10.6018, 10.5188, 10.53 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:34:13Z",
+          "avg_ns": 2882360322,
+          "stddev_ns": 1226033,
+          "avg_ts": 44.408055,
+          "stddev_ts": 0.018889,
+          "samples_ns": [
+            2883606143,
+            2881155084,
+            2882319739
+          ],
+          "samples_ts": [
+            44.3889,
+            44.4266,
+            44.4087
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:34:25Z",
+          "avg_ns": 12132645288,
+          "stddev_ns": 51678750,
+          "avg_ts": 10.550177,
+          "stddev_ts": 0.045041,
+          "samples_ns": [
+            12073438821,
+            12168700402,
+            12155796642
+          ],
+          "samples_ts": [
+            10.6018,
+            10.5188,
+            10.53
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 544
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:37:41.095536+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:35:02Z\",\n    \"avg_ns\": 2880932280,\n    \"stddev_ns\": 2105819,\n    \"avg_ts\": 44.430078,\n    \"stddev_ts\": 0.032453,\n    \"samples_ns\": [ 2879510982, 2883350714, 2879935145 ],\n    \"samples_ts\": [ 44.452, 44.3928, 44.4454 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:35:14Z\",\n    \"avg_ns\": 48940081223,\n    \"stddev_ns\": 84357743,\n    \"avg_ts\": 10.461793,\n    \"stddev_ts\": 0.018049,\n    \"samples_ns\": [ 48977705438, 48999079430, 48843458803 ],\n    \"samples_ts\": [ 10.4537, 10.4492, 10.4825 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:35:02Z",
+          "avg_ns": 2880932280,
+          "stddev_ns": 2105819,
+          "avg_ts": 44.430078,
+          "stddev_ts": 0.032453,
+          "samples_ns": [
+            2879510982,
+            2883350714,
+            2879935145
+          ],
+          "samples_ts": [
+            44.452,
+            44.3928,
+            44.4454
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:35:14Z",
+          "avg_ns": 48940081223,
+          "stddev_ns": 84357743,
+          "avg_ts": 10.461793,
+          "stddev_ts": 0.018049,
+          "samples_ns": [
+            48977705438,
+            48999079430,
+            48843458803
+          ],
+          "samples_ts": [
+            10.4537,
+            10.4492,
+            10.4825
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 545
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:39:04.909471+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:37:41Z\",\n    \"avg_ns\": 11563084758,\n    \"stddev_ns\": 3935651,\n    \"avg_ts\": 44.278845,\n    \"stddev_ts\": 0.015062,\n    \"samples_ns\": [ 11564871244, 11558575914, 11565807118 ],\n    \"samples_ts\": [ 44.272, 44.2961, 44.2684 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:38:28Z\",\n    \"avg_ns\": 12163154640,\n    \"stddev_ns\": 37326917,\n    \"avg_ts\": 10.523652,\n    \"stddev_ts\": 0.032351,\n    \"samples_ns\": [ 12120245898, 12181083582, 12188134440 ],\n    \"samples_ts\": [ 10.5608, 10.5081, 10.502 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:37:41Z",
+          "avg_ns": 11563084758,
+          "stddev_ns": 3935651,
+          "avg_ts": 44.278845,
+          "stddev_ts": 0.015062,
+          "samples_ns": [
+            11564871244,
+            11558575914,
+            11565807118
+          ],
+          "samples_ts": [
+            44.272,
+            44.2961,
+            44.2684
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:38:28Z",
+          "avg_ns": 12163154640,
+          "stddev_ns": 37326917,
+          "avg_ts": 10.523652,
+          "stddev_ts": 0.032351,
+          "samples_ns": [
+            12120245898,
+            12181083582,
+            12188134440
+          ],
+          "samples_ts": [
+            10.5608,
+            10.5081,
+            10.502
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 546
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:42:19.595986+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:39:05Z\",\n    \"avg_ns\": 11640892572,\n    \"stddev_ns\": 58751538,\n    \"avg_ts\": 43.983631,\n    \"stddev_ts\": 0.222626,\n    \"samples_ns\": [ 11573144793, 11671698828, 11677834096 ],\n    \"samples_ts\": [ 44.2404, 43.8668, 43.8437 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:39:52Z\",\n    \"avg_ns\": 49021398888,\n    \"stddev_ns\": 35107013,\n    \"avg_ts\": 10.444422,\n    \"stddev_ts\": 0.007479,\n    \"samples_ns\": [ 48988660475, 49017066609, 49058469582 ],\n    \"samples_ts\": [ 10.4514, 10.4453, 10.4365 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:39:05Z",
+          "avg_ns": 11640892572,
+          "stddev_ns": 58751538,
+          "avg_ts": 43.983631,
+          "stddev_ts": 0.222626,
+          "samples_ns": [
+            11573144793,
+            11671698828,
+            11677834096
+          ],
+          "samples_ts": [
+            44.2404,
+            43.8668,
+            43.8437
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:39:52Z",
+          "avg_ns": 49021398888,
+          "stddev_ns": 35107013,
+          "avg_ts": 10.444422,
+          "stddev_ts": 0.007479,
+          "samples_ns": [
+            48988660475,
+            49017066609,
+            49058469582
+          ],
+          "samples_ts": [
+            10.4514,
+            10.4453,
+            10.4365
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 547
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:43:08.544719+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:42:20Z\",\n    \"avg_ns\": 2877966390,\n    \"stddev_ns\": 1057074,\n    \"avg_ts\": 44.475853,\n    \"stddev_ts\": 0.016296,\n    \"samples_ns\": [ 2876790769, 2878828219, 2878280184 ],\n    \"samples_ts\": [ 44.494, 44.4625, 44.471 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:42:32Z\",\n    \"avg_ns\": 12118060218,\n    \"stddev_ns\": 55400774,\n    \"avg_ts\": 10.562894,\n    \"stddev_ts\": 0.048418,\n    \"samples_ns\": [ 12054107331, 12151361353, 12148711971 ],\n    \"samples_ts\": [ 10.6188, 10.5338, 10.5361 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:42:20Z",
+          "avg_ns": 2877966390,
+          "stddev_ns": 1057074,
+          "avg_ts": 44.475853,
+          "stddev_ts": 0.016296,
+          "samples_ns": [
+            2876790769,
+            2878828219,
+            2878280184
+          ],
+          "samples_ts": [
+            44.494,
+            44.4625,
+            44.471
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:42:32Z",
+          "avg_ns": 12118060218,
+          "stddev_ns": 55400774,
+          "avg_ts": 10.562894,
+          "stddev_ts": 0.048418,
+          "samples_ns": [
+            12054107331,
+            12151361353,
+            12148711971
+          ],
+          "samples_ts": [
+            10.6188,
+            10.5338,
+            10.5361
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 548
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:45:47.774402+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:43:09Z\",\n    \"avg_ns\": 2884399472,\n    \"stddev_ns\": 1562127,\n    \"avg_ts\": 44.376664,\n    \"stddev_ts\": 0.024010,\n    \"samples_ns\": [ 2885731448, 2884784130, 2882682840 ],\n    \"samples_ts\": [ 44.3562, 44.3707, 44.4031 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:43:20Z\",\n    \"avg_ns\": 48872137761,\n    \"stddev_ns\": 61942135,\n    \"avg_ts\": 10.476328,\n    \"stddev_ts\": 0.013284,\n    \"samples_ns\": [ 48886807727, 48925427636, 48804177921 ],\n    \"samples_ts\": [ 10.4732, 10.4649, 10.4909 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:43:09Z",
+          "avg_ns": 2884399472,
+          "stddev_ns": 1562127,
+          "avg_ts": 44.376664,
+          "stddev_ts": 0.02401,
+          "samples_ns": [
+            2885731448,
+            2884784130,
+            2882682840
+          ],
+          "samples_ts": [
+            44.3562,
+            44.3707,
+            44.4031
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:43:20Z",
+          "avg_ns": 48872137761,
+          "stddev_ns": 61942135,
+          "avg_ts": 10.476328,
+          "stddev_ts": 0.013284,
+          "samples_ns": [
+            48886807727,
+            48925427636,
+            48804177921
+          ],
+          "samples_ts": [
+            10.4732,
+            10.4649,
+            10.4909
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 549
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:47:12.459269+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:45:48Z\",\n    \"avg_ns\": 11766368687,\n    \"stddev_ns\": 5252526,\n    \"avg_ts\": 43.513856,\n    \"stddev_ts\": 0.019427,\n    \"samples_ns\": [ 11767378340, 11760684625, 11771043096 ],\n    \"samples_ts\": [ 43.5101, 43.5349, 43.4966 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:46:35Z\",\n    \"avg_ns\": 12174559769,\n    \"stddev_ns\": 30444754,\n    \"avg_ts\": 10.513771,\n    \"stddev_ts\": 0.026295,\n    \"samples_ns\": [ 12143634401, 12204499576, 12175545331 ],\n    \"samples_ts\": [ 10.5405, 10.4879, 10.5129 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:45:48Z",
+          "avg_ns": 11766368687,
+          "stddev_ns": 5252526,
+          "avg_ts": 43.513856,
+          "stddev_ts": 0.019427,
+          "samples_ns": [
+            11767378340,
+            11760684625,
+            11771043096
+          ],
+          "samples_ts": [
+            43.5101,
+            43.5349,
+            43.4966
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:46:35Z",
+          "avg_ns": 12174559769,
+          "stddev_ns": 30444754,
+          "avg_ts": 10.513771,
+          "stddev_ts": 0.026295,
+          "samples_ns": [
+            12143634401,
+            12204499576,
+            12175545331
+          ],
+          "samples_ts": [
+            10.5405,
+            10.4879,
+            10.5129
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 550
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:50:27.684057+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:47:13Z\",\n    \"avg_ns\": 11765553733,\n    \"stddev_ns\": 2619860,\n    \"avg_ts\": 43.516865,\n    \"stddev_ts\": 0.009691,\n    \"samples_ns\": [ 11762557854, 11766688077, 11767415268 ],\n    \"samples_ts\": [ 43.5279, 43.5127, 43.51 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:48:00Z\",\n    \"avg_ns\": 49021606135,\n    \"stddev_ns\": 13104402,\n    \"avg_ts\": 10.444375,\n    \"stddev_ts\": 0.002792,\n    \"samples_ns\": [ 49006624660, 49027255155, 49030938590 ],\n    \"samples_ts\": [ 10.4476, 10.4432, 10.4424 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:47:13Z",
+          "avg_ns": 11765553733,
+          "stddev_ns": 2619860,
+          "avg_ts": 43.516865,
+          "stddev_ts": 0.009691,
+          "samples_ns": [
+            11762557854,
+            11766688077,
+            11767415268
+          ],
+          "samples_ts": [
+            43.5279,
+            43.5127,
+            43.51
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:48:00Z",
+          "avg_ns": 49021606135,
+          "stddev_ns": 13104402,
+          "avg_ts": 10.444375,
+          "stddev_ts": 0.002792,
+          "samples_ns": [
+            49006624660,
+            49027255155,
+            49030938590
+          ],
+          "samples_ts": [
+            10.4476,
+            10.4432,
+            10.4424
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 551
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:51:16.797024+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:50:28Z\",\n    \"avg_ns\": 2882791083,\n    \"stddev_ns\": 595875,\n    \"avg_ts\": 44.401415,\n    \"stddev_ts\": 0.009102,\n    \"samples_ns\": [ 2882447794, 2883473528, 2882451929 ],\n    \"samples_ts\": [ 44.4067, 44.3909, 44.4066 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:50:40Z\",\n    \"avg_ns\": 12164412369,\n    \"stddev_ns\": 30231942,\n    \"avg_ts\": 10.522541,\n    \"stddev_ts\": 0.026117,\n    \"samples_ns\": [ 12142821149, 12151452248, 12198963710 ],\n    \"samples_ts\": [ 10.5412, 10.5337, 10.4927 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:50:28Z",
+          "avg_ns": 2882791083,
+          "stddev_ns": 595875,
+          "avg_ts": 44.401415,
+          "stddev_ts": 0.009102,
+          "samples_ns": [
+            2882447794,
+            2883473528,
+            2882451929
+          ],
+          "samples_ts": [
+            44.4067,
+            44.3909,
+            44.4066
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:50:40Z",
+          "avg_ns": 12164412369,
+          "stddev_ns": 30231942,
+          "avg_ts": 10.522541,
+          "stddev_ts": 0.026117,
+          "samples_ns": [
+            12142821149,
+            12151452248,
+            12198963710
+          ],
+          "samples_ts": [
+            10.5412,
+            10.5337,
+            10.4927
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 552
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:53:56.493167+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:51:17Z\",\n    \"avg_ns\": 2882607152,\n    \"stddev_ns\": 644139,\n    \"avg_ts\": 44.404248,\n    \"stddev_ts\": 0.009923,\n    \"samples_ns\": [ 2883115062, 2881882627, 2882823767 ],\n    \"samples_ts\": [ 44.3964, 44.4154, 44.4009 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:51:29Z\",\n    \"avg_ns\": 49027581029,\n    \"stddev_ns\": 34037255,\n    \"avg_ts\": 10.443105,\n    \"stddev_ts\": 0.007251,\n    \"samples_ns\": [ 48990320946, 49035385116, 49057037027 ],\n    \"samples_ts\": [ 10.451, 10.4414, 10.4368 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:51:17Z",
+          "avg_ns": 2882607152,
+          "stddev_ns": 644139,
+          "avg_ts": 44.404248,
+          "stddev_ts": 0.009923,
+          "samples_ns": [
+            2883115062,
+            2881882627,
+            2882823767
+          ],
+          "samples_ts": [
+            44.3964,
+            44.4154,
+            44.4009
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:51:29Z",
+          "avg_ns": 49027581029,
+          "stddev_ns": 34037255,
+          "avg_ts": 10.443105,
+          "stddev_ts": 0.007251,
+          "samples_ns": [
+            48990320946,
+            49035385116,
+            49057037027
+          ],
+          "samples_ts": [
+            10.451,
+            10.4414,
+            10.4368
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 553
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:55:20.104304+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:53:57Z\",\n    \"avg_ns\": 11499547062,\n    \"stddev_ns\": 3666340,\n    \"avg_ts\": 44.523496,\n    \"stddev_ts\": 0.014185,\n    \"samples_ns\": [ 11502508960, 11495450884, 11500681344 ],\n    \"samples_ts\": [ 44.512, 44.5394, 44.5191 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:54:43Z\",\n    \"avg_ns\": 12175101908,\n    \"stddev_ns\": 17624388,\n    \"avg_ts\": 10.513274,\n    \"stddev_ts\": 0.015218,\n    \"samples_ns\": [ 12157825908, 12174424655, 12193055161 ],\n    \"samples_ts\": [ 10.5282, 10.5138, 10.4978 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:53:57Z",
+          "avg_ns": 11499547062,
+          "stddev_ns": 3666340,
+          "avg_ts": 44.523496,
+          "stddev_ts": 0.014185,
+          "samples_ns": [
+            11502508960,
+            11495450884,
+            11500681344
+          ],
+          "samples_ts": [
+            44.512,
+            44.5394,
+            44.5191
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:54:43Z",
+          "avg_ns": 12175101908,
+          "stddev_ns": 17624388,
+          "avg_ts": 10.513274,
+          "stddev_ts": 0.015218,
+          "samples_ns": [
+            12157825908,
+            12174424655,
+            12193055161
+          ],
+          "samples_ts": [
+            10.5282,
+            10.5138,
+            10.4978
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 554
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:58:34.675421+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:55:21Z\",\n    \"avg_ns\": 11563471050,\n    \"stddev_ns\": 58330331,\n    \"avg_ts\": 44.278111,\n    \"stddev_ts\": 0.222706,\n    \"samples_ns\": [ 11529807481, 11630824999, 11529780671 ],\n    \"samples_ts\": [ 44.4066, 44.021, 44.4067 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:56:07Z\",\n    \"avg_ns\": 49085587826,\n    \"stddev_ns\": 44411759,\n    \"avg_ts\": 10.430766,\n    \"stddev_ts\": 0.009433,\n    \"samples_ns\": [ 49051731624, 49135872688, 49069159167 ],\n    \"samples_ts\": [ 10.438, 10.4201, 10.4343 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:55:21Z",
+          "avg_ns": 11563471050,
+          "stddev_ns": 58330331,
+          "avg_ts": 44.278111,
+          "stddev_ts": 0.222706,
+          "samples_ns": [
+            11529807481,
+            11630824999,
+            11529780671
+          ],
+          "samples_ts": [
+            44.4066,
+            44.021,
+            44.4067
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:56:07Z",
+          "avg_ns": 49085587826,
+          "stddev_ns": 44411759,
+          "avg_ts": 10.430766,
+          "stddev_ts": 0.009433,
+          "samples_ns": [
+            49051731624,
+            49135872688,
+            49069159167
+          ],
+          "samples_ts": [
+            10.438,
+            10.4201,
+            10.4343
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 555
+    },
+    {
+      "timestamp_utc": "2025-12-09T05:59:23.715356+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:58:35Z\",\n    \"avg_ns\": 2919528161,\n    \"stddev_ns\": 66433274,\n    \"avg_ts\": 43.857644,\n    \"stddev_ts\": 0.985037,\n    \"samples_ns\": [ 2880537428, 2996235170, 2881811885 ],\n    \"samples_ts\": [ 44.4362, 42.7203, 44.4165 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:58:47Z\",\n    \"avg_ns\": 12098123472,\n    \"stddev_ns\": 39198365,\n    \"avg_ts\": 10.580227,\n    \"stddev_ts\": 0.034344,\n    \"samples_ns\": [ 12052898637, 12119143581, 12122328199 ],\n    \"samples_ts\": [ 10.6199, 10.5618, 10.559 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:58:35Z",
+          "avg_ns": 2919528161,
+          "stddev_ns": 66433274,
+          "avg_ts": 43.857644,
+          "stddev_ts": 0.985037,
+          "samples_ns": [
+            2880537428,
+            2996235170,
+            2881811885
+          ],
+          "samples_ts": [
+            44.4362,
+            42.7203,
+            44.4165
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:58:47Z",
+          "avg_ns": 12098123472,
+          "stddev_ns": 39198365,
+          "avg_ts": 10.580227,
+          "stddev_ts": 0.034344,
+          "samples_ns": [
+            12052898637,
+            12119143581,
+            12122328199
+          ],
+          "samples_ts": [
+            10.6199,
+            10.5618,
+            10.559
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 556
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:02:03.012464+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:59:24Z\",\n    \"avg_ns\": 2873592881,\n    \"stddev_ns\": 2133442,\n    \"avg_ts\": 44.543557,\n    \"stddev_ts\": 0.033057,\n    \"samples_ns\": [ 2875800141, 2871543363, 2873435140 ],\n    \"samples_ts\": [ 44.5094, 44.5753, 44.546 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T05:59:36Z\",\n    \"avg_ns\": 48908151255,\n    \"stddev_ns\": 16322905,\n    \"avg_ts\": 10.468603,\n    \"stddev_ts\": 0.003493,\n    \"samples_ns\": [ 48924694725, 48907694548, 48892064494 ],\n    \"samples_ts\": [ 10.4651, 10.4687, 10.472 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:59:24Z",
+          "avg_ns": 2873592881,
+          "stddev_ns": 2133442,
+          "avg_ts": 44.543557,
+          "stddev_ts": 0.033057,
+          "samples_ns": [
+            2875800141,
+            2871543363,
+            2873435140
+          ],
+          "samples_ts": [
+            44.5094,
+            44.5753,
+            44.546
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T05:59:36Z",
+          "avg_ns": 48908151255,
+          "stddev_ns": 16322905,
+          "avg_ts": 10.468603,
+          "stddev_ts": 0.003493,
+          "samples_ns": [
+            48924694725,
+            48907694548,
+            48892064494
+          ],
+          "samples_ts": [
+            10.4651,
+            10.4687,
+            10.472
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 557
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:03:26.976855+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:02:03Z\",\n    \"avg_ns\": 11579963761,\n    \"stddev_ns\": 4575463,\n    \"avg_ts\": 44.214305,\n    \"stddev_ts\": 0.017468,\n    \"samples_ns\": [ 11579326289, 11584824533, 11575740461 ],\n    \"samples_ts\": [ 44.2167, 44.1957, 44.2304 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:02:50Z\",\n    \"avg_ns\": 12177624731,\n    \"stddev_ns\": 12720486,\n    \"avg_ts\": 10.511089,\n    \"stddev_ts\": 0.010985,\n    \"samples_ns\": [ 12162963880, 12185717391, 12184192924 ],\n    \"samples_ts\": [ 10.5238, 10.5041, 10.5054 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:02:03Z",
+          "avg_ns": 11579963761,
+          "stddev_ns": 4575463,
+          "avg_ts": 44.214305,
+          "stddev_ts": 0.017468,
+          "samples_ns": [
+            11579326289,
+            11584824533,
+            11575740461
+          ],
+          "samples_ts": [
+            44.2167,
+            44.1957,
+            44.2304
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:02:50Z",
+          "avg_ns": 12177624731,
+          "stddev_ns": 12720486,
+          "avg_ts": 10.511089,
+          "stddev_ts": 0.010985,
+          "samples_ns": [
+            12162963880,
+            12185717391,
+            12184192924
+          ],
+          "samples_ts": [
+            10.5238,
+            10.5041,
+            10.5054
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 558
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:06:41.079231+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:03:27Z\",\n    \"avg_ns\": 11603472653,\n    \"stddev_ns\": 61354211,\n    \"avg_ts\": 44.125542,\n    \"stddev_ts\": 0.232607,\n    \"samples_ns\": [ 11674316838, 11567658461, 11568442661 ],\n    \"samples_ts\": [ 43.857, 44.2613, 44.2583 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:04:14Z\",\n    \"avg_ns\": 48882203081,\n    \"stddev_ns\": 68757218,\n    \"avg_ts\": 10.474173,\n    \"stddev_ts\": 0.014727,\n    \"samples_ns\": [ 48869435153, 48820725118, 48956448973 ],\n    \"samples_ts\": [ 10.4769, 10.4873, 10.4583 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:03:27Z",
+          "avg_ns": 11603472653,
+          "stddev_ns": 61354211,
+          "avg_ts": 44.125542,
+          "stddev_ts": 0.232607,
+          "samples_ns": [
+            11674316838,
+            11567658461,
+            11568442661
+          ],
+          "samples_ts": [
+            43.857,
+            44.2613,
+            44.2583
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:04:14Z",
+          "avg_ns": 48882203081,
+          "stddev_ns": 68757218,
+          "avg_ts": 10.474173,
+          "stddev_ts": 0.014727,
+          "samples_ns": [
+            48869435153,
+            48820725118,
+            48956448973
+          ],
+          "samples_ts": [
+            10.4769,
+            10.4873,
+            10.4583
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 559
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:07:29.928381+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:06:41Z\",\n    \"avg_ns\": 2878408762,\n    \"stddev_ns\": 2143859,\n    \"avg_ts\": 44.469031,\n    \"stddev_ts\": 0.033090,\n    \"samples_ns\": [ 2876691457, 2880809661, 2877725170 ],\n    \"samples_ts\": [ 44.4956, 44.432, 44.4796 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:06:53Z\",\n    \"avg_ns\": 12084120045,\n    \"stddev_ns\": 66472282,\n    \"avg_ts\": 10.592628,\n    \"stddev_ts\": 0.058416,\n    \"samples_ns\": [ 12009148432, 12135853912, 12107357791 ],\n    \"samples_ts\": [ 10.6585, 10.5473, 10.5721 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:06:41Z",
+          "avg_ns": 2878408762,
+          "stddev_ns": 2143859,
+          "avg_ts": 44.469031,
+          "stddev_ts": 0.03309,
+          "samples_ns": [
+            2876691457,
+            2880809661,
+            2877725170
+          ],
+          "samples_ts": [
+            44.4956,
+            44.432,
+            44.4796
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:06:53Z",
+          "avg_ns": 12084120045,
+          "stddev_ns": 66472282,
+          "avg_ts": 10.592628,
+          "stddev_ts": 0.058416,
+          "samples_ns": [
+            12009148432,
+            12135853912,
+            12107357791
+          ],
+          "samples_ts": [
+            10.6585,
+            10.5473,
+            10.5721
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 560
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:10:08.572095+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:07:30Z\",\n    \"avg_ns\": 2880935926,\n    \"stddev_ns\": 1802024,\n    \"avg_ts\": 44.430017,\n    \"stddev_ts\": 0.027757,\n    \"samples_ns\": [ 2879758916, 2883008519, 2880040345 ],\n    \"samples_ts\": [ 44.4482, 44.3981, 44.4438 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:07:42Z\",\n    \"avg_ns\": 48662230236,\n    \"stddev_ns\": 565559318,\n    \"avg_ts\": 10.522461,\n    \"stddev_ts\": 0.123114,\n    \"samples_ns\": [ 49010171994, 48966861169, 48009657546 ],\n    \"samples_ts\": [ 10.4468, 10.4561, 10.6645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:07:30Z",
+          "avg_ns": 2880935926,
+          "stddev_ns": 1802024,
+          "avg_ts": 44.430017,
+          "stddev_ts": 0.027757,
+          "samples_ns": [
+            2879758916,
+            2883008519,
+            2880040345
+          ],
+          "samples_ts": [
+            44.4482,
+            44.3981,
+            44.4438
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:07:42Z",
+          "avg_ns": 48662230236,
+          "stddev_ns": 565559318,
+          "avg_ts": 10.522461,
+          "stddev_ts": 0.123114,
+          "samples_ns": [
+            49010171994,
+            48966861169,
+            48009657546
+          ],
+          "samples_ts": [
+            10.4468,
+            10.4561,
+            10.6645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 561
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:11:33.172649+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:10:09Z\",\n    \"avg_ns\": 11738500157,\n    \"stddev_ns\": 3027254,\n    \"avg_ts\": 43.617159,\n    \"stddev_ts\": 0.011249,\n    \"samples_ns\": [ 11735270198, 11738957643, 11741272630 ],\n    \"samples_ts\": [ 43.6292, 43.6155, 43.6069 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:10:56Z\",\n    \"avg_ns\": 12175398358,\n    \"stddev_ns\": 26968178,\n    \"avg_ts\": 10.513037,\n    \"stddev_ts\": 0.023298,\n    \"samples_ns\": [ 12146569064, 12200006735, 12179619277 ],\n    \"samples_ts\": [ 10.538, 10.4918, 10.5094 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:10:09Z",
+          "avg_ns": 11738500157,
+          "stddev_ns": 3027254,
+          "avg_ts": 43.617159,
+          "stddev_ts": 0.011249,
+          "samples_ns": [
+            11735270198,
+            11738957643,
+            11741272630
+          ],
+          "samples_ts": [
+            43.6292,
+            43.6155,
+            43.6069
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:10:56Z",
+          "avg_ns": 12175398358,
+          "stddev_ns": 26968178,
+          "avg_ts": 10.513037,
+          "stddev_ts": 0.023298,
+          "samples_ns": [
+            12146569064,
+            12200006735,
+            12179619277
+          ],
+          "samples_ts": [
+            10.538,
+            10.4918,
+            10.5094
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 562
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:14:48.409003+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:11:34Z\",\n    \"avg_ns\": 11746512377,\n    \"stddev_ns\": 4840018,\n    \"avg_ts\": 43.587411,\n    \"stddev_ts\": 0.017959,\n    \"samples_ns\": [ 11749974967, 11748578698, 11740983467 ],\n    \"samples_ts\": [ 43.5746, 43.5797, 43.6079 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:12:21Z\",\n    \"avg_ns\": 49054479235,\n    \"stddev_ns\": 50063627,\n    \"avg_ts\": 10.437382,\n    \"stddev_ts\": 0.010658,\n    \"samples_ns\": [ 48996684407, 49082310747, 49084442552 ],\n    \"samples_ts\": [ 10.4497, 10.4315, 10.431 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:11:34Z",
+          "avg_ns": 11746512377,
+          "stddev_ns": 4840018,
+          "avg_ts": 43.587411,
+          "stddev_ts": 0.017959,
+          "samples_ns": [
+            11749974967,
+            11748578698,
+            11740983467
+          ],
+          "samples_ts": [
+            43.5746,
+            43.5797,
+            43.6079
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:12:21Z",
+          "avg_ns": 49054479235,
+          "stddev_ns": 50063627,
+          "avg_ts": 10.437382,
+          "stddev_ts": 0.010658,
+          "samples_ns": [
+            48996684407,
+            49082310747,
+            49084442552
+          ],
+          "samples_ts": [
+            10.4497,
+            10.4315,
+            10.431
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 563
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:15:37.421177+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:14:49Z\",\n    \"avg_ns\": 2875780043,\n    \"stddev_ns\": 1150358,\n    \"avg_ts\": 44.509668,\n    \"stddev_ts\": 0.017781,\n    \"samples_ns\": [ 2875023259, 2877102314, 2875214557 ],\n    \"samples_ts\": [ 44.5214, 44.4892, 44.5184 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:15:00Z\",\n    \"avg_ns\": 12127396473,\n    \"stddev_ns\": 40612072,\n    \"avg_ts\": 10.554694,\n    \"stddev_ts\": 0.035378,\n    \"samples_ns\": [ 12083485853, 12163606930, 12135096638 ],\n    \"samples_ts\": [ 10.593, 10.5232, 10.5479 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:14:49Z",
+          "avg_ns": 2875780043,
+          "stddev_ns": 1150358,
+          "avg_ts": 44.509668,
+          "stddev_ts": 0.017781,
+          "samples_ns": [
+            2875023259,
+            2877102314,
+            2875214557
+          ],
+          "samples_ts": [
+            44.5214,
+            44.4892,
+            44.5184
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:15:00Z",
+          "avg_ns": 12127396473,
+          "stddev_ns": 40612072,
+          "avg_ts": 10.554694,
+          "stddev_ts": 0.035378,
+          "samples_ns": [
+            12083485853,
+            12163606930,
+            12135096638
+          ],
+          "samples_ts": [
+            10.593,
+            10.5232,
+            10.5479
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 564
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:18:17.036303+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:15:38Z\",\n    \"avg_ns\": 2885917384,\n    \"stddev_ns\": 5242436,\n    \"avg_ts\": 44.353412,\n    \"stddev_ts\": 0.080637,\n    \"samples_ns\": [ 2887718542, 2880011806, 2890021804 ],\n    \"samples_ts\": [ 44.3256, 44.4443, 44.2903 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:15:49Z\",\n    \"avg_ns\": 48994479363,\n    \"stddev_ns\": 88374234,\n    \"avg_ts\": 10.450180,\n    \"stddev_ts\": 0.018868,\n    \"samples_ns\": [ 48893149190, 49055587354, 49034701547 ],\n    \"samples_ts\": [ 10.4718, 10.4371, 10.4416 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:15:38Z",
+          "avg_ns": 2885917384,
+          "stddev_ns": 5242436,
+          "avg_ts": 44.353412,
+          "stddev_ts": 0.080637,
+          "samples_ns": [
+            2887718542,
+            2880011806,
+            2890021804
+          ],
+          "samples_ts": [
+            44.3256,
+            44.4443,
+            44.2903
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:15:49Z",
+          "avg_ns": 48994479363,
+          "stddev_ns": 88374234,
+          "avg_ts": 10.45018,
+          "stddev_ts": 0.018868,
+          "samples_ns": [
+            48893149190,
+            49055587354,
+            49034701547
+          ],
+          "samples_ts": [
+            10.4718,
+            10.4371,
+            10.4416
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 565
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:19:40.720273+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:18:17Z\",\n    \"avg_ns\": 11514193148,\n    \"stddev_ns\": 5817262,\n    \"avg_ts\": 44.466866,\n    \"stddev_ts\": 0.022472,\n    \"samples_ns\": [ 11518474457, 11507569934, 11516535053 ],\n    \"samples_ts\": [ 44.4503, 44.4925, 44.4578 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:19:04Z\",\n    \"avg_ns\": 12179677624,\n    \"stddev_ns\": 26053213,\n    \"avg_ts\": 10.509341,\n    \"stddev_ts\": 0.022471,\n    \"samples_ns\": [ 12155401132, 12207202841, 12176428899 ],\n    \"samples_ts\": [ 10.5303, 10.4856, 10.5121 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:18:17Z",
+          "avg_ns": 11514193148,
+          "stddev_ns": 5817262,
+          "avg_ts": 44.466866,
+          "stddev_ts": 0.022472,
+          "samples_ns": [
+            11518474457,
+            11507569934,
+            11516535053
+          ],
+          "samples_ts": [
+            44.4503,
+            44.4925,
+            44.4578
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:19:04Z",
+          "avg_ns": 12179677624,
+          "stddev_ns": 26053213,
+          "avg_ts": 10.509341,
+          "stddev_ts": 0.022471,
+          "samples_ns": [
+            12155401132,
+            12207202841,
+            12176428899
+          ],
+          "samples_ts": [
+            10.5303,
+            10.4856,
+            10.5121
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 566
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:22:54.683687+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:19:41Z\",\n    \"avg_ns\": 11518750574,\n    \"stddev_ns\": 2457517,\n    \"avg_ts\": 44.449267,\n    \"stddev_ts\": 0.009465,\n    \"samples_ns\": [ 11518543226, 11521300493, 11516408005 ],\n    \"samples_ts\": [ 44.4501, 44.4394, 44.4583 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:20:27Z\",\n    \"avg_ns\": 48913943995,\n    \"stddev_ns\": 18014583,\n    \"avg_ts\": 10.467364,\n    \"stddev_ts\": 0.003854,\n    \"samples_ns\": [ 48897860622, 48910563330, 48933408034 ],\n    \"samples_ts\": [ 10.4708, 10.4681, 10.4632 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:19:41Z",
+          "avg_ns": 11518750574,
+          "stddev_ns": 2457517,
+          "avg_ts": 44.449267,
+          "stddev_ts": 0.009465,
+          "samples_ns": [
+            11518543226,
+            11521300493,
+            11516408005
+          ],
+          "samples_ts": [
+            44.4501,
+            44.4394,
+            44.4583
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:20:27Z",
+          "avg_ns": 48913943995,
+          "stddev_ns": 18014583,
+          "avg_ts": 10.467364,
+          "stddev_ts": 0.003854,
+          "samples_ns": [
+            48897860622,
+            48910563330,
+            48933408034
+          ],
+          "samples_ts": [
+            10.4708,
+            10.4681,
+            10.4632
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 567
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:23:43.685742+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:22:55Z\",\n    \"avg_ns\": 2877884219,\n    \"stddev_ns\": 1972605,\n    \"avg_ts\": 44.477133,\n    \"stddev_ts\": 0.030474,\n    \"samples_ns\": [ 2880160227, 2876823771, 2876668659 ],\n    \"samples_ts\": [ 44.442, 44.4935, 44.4959 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:23:07Z\",\n    \"avg_ns\": 12127546053,\n    \"stddev_ns\": 39457355,\n    \"avg_ts\": 10.554559,\n    \"stddev_ts\": 0.034380,\n    \"samples_ns\": [ 12083993807, 12137734590, 12160909763 ],\n    \"samples_ts\": [ 10.5925, 10.5456, 10.5255 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:22:55Z",
+          "avg_ns": 2877884219,
+          "stddev_ns": 1972605,
+          "avg_ts": 44.477133,
+          "stddev_ts": 0.030474,
+          "samples_ns": [
+            2880160227,
+            2876823771,
+            2876668659
+          ],
+          "samples_ts": [
+            44.442,
+            44.4935,
+            44.4959
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:23:07Z",
+          "avg_ns": 12127546053,
+          "stddev_ns": 39457355,
+          "avg_ts": 10.554559,
+          "stddev_ts": 0.03438,
+          "samples_ns": [
+            12083993807,
+            12137734590,
+            12160909763
+          ],
+          "samples_ts": [
+            10.5925,
+            10.5456,
+            10.5255
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 568
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:26:23.160413+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:23:44Z\",\n    \"avg_ns\": 2877067095,\n    \"stddev_ns\": 1865936,\n    \"avg_ts\": 44.489764,\n    \"stddev_ts\": 0.028839,\n    \"samples_ns\": [ 2877696503, 2878535325, 2874969459 ],\n    \"samples_ts\": [ 44.48, 44.4671, 44.5222 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:23:56Z\",\n    \"avg_ns\": 48944431215,\n    \"stddev_ns\": 73192271,\n    \"avg_ts\": 10.460858,\n    \"stddev_ts\": 0.015640,\n    \"samples_ns\": [ 48937289195, 49020932017, 48875072435 ],\n    \"samples_ts\": [ 10.4624, 10.4445, 10.4757 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:23:44Z",
+          "avg_ns": 2877067095,
+          "stddev_ns": 1865936,
+          "avg_ts": 44.489764,
+          "stddev_ts": 0.028839,
+          "samples_ns": [
+            2877696503,
+            2878535325,
+            2874969459
+          ],
+          "samples_ts": [
+            44.48,
+            44.4671,
+            44.5222
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:23:56Z",
+          "avg_ns": 48944431215,
+          "stddev_ns": 73192271,
+          "avg_ts": 10.460858,
+          "stddev_ts": 0.01564,
+          "samples_ns": [
+            48937289195,
+            49020932017,
+            48875072435
+          ],
+          "samples_ts": [
+            10.4624,
+            10.4445,
+            10.4757
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 569
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:27:47.155605+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:26:24Z\",\n    \"avg_ns\": 11585624040,\n    \"stddev_ns\": 8059680,\n    \"avg_ts\": 44.192714,\n    \"stddev_ts\": 0.030733,\n    \"samples_ns\": [ 11582512462, 11594775684, 11579583974 ],\n    \"samples_ts\": [ 44.2046, 44.1578, 44.2158 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:27:10Z\",\n    \"avg_ns\": 12188035953,\n    \"stddev_ns\": 42954239,\n    \"avg_ts\": 10.502189,\n    \"stddev_ts\": 0.037087,\n    \"samples_ns\": [ 12138525333, 12215356101, 12210226426 ],\n    \"samples_ts\": [ 10.5449, 10.4786, 10.483 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:26:24Z",
+          "avg_ns": 11585624040,
+          "stddev_ns": 8059680,
+          "avg_ts": 44.192714,
+          "stddev_ts": 0.030733,
+          "samples_ns": [
+            11582512462,
+            11594775684,
+            11579583974
+          ],
+          "samples_ts": [
+            44.2046,
+            44.1578,
+            44.2158
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:27:10Z",
+          "avg_ns": 12188035953,
+          "stddev_ns": 42954239,
+          "avg_ts": 10.502189,
+          "stddev_ts": 0.037087,
+          "samples_ns": [
+            12138525333,
+            12215356101,
+            12210226426
+          ],
+          "samples_ts": [
+            10.5449,
+            10.4786,
+            10.483
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 570
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:31:01.458980+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:27:48Z\",\n    \"avg_ns\": 11581577316,\n    \"stddev_ns\": 4383068,\n    \"avg_ts\": 44.208145,\n    \"stddev_ts\": 0.016732,\n    \"samples_ns\": [ 11585464140, 11582441170, 11576826638 ],\n    \"samples_ts\": [ 44.1933, 44.2048, 44.2263 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:28:34Z\",\n    \"avg_ns\": 48958937627,\n    \"stddev_ns\": 33614714,\n    \"avg_ts\": 10.457747,\n    \"stddev_ts\": 0.007181,\n    \"samples_ns\": [ 48922202801, 48966453060, 48988157022 ],\n    \"samples_ts\": [ 10.4656, 10.4561, 10.4515 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:27:48Z",
+          "avg_ns": 11581577316,
+          "stddev_ns": 4383068,
+          "avg_ts": 44.208145,
+          "stddev_ts": 0.016732,
+          "samples_ns": [
+            11585464140,
+            11582441170,
+            11576826638
+          ],
+          "samples_ts": [
+            44.1933,
+            44.2048,
+            44.2263
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:28:34Z",
+          "avg_ns": 48958937627,
+          "stddev_ns": 33614714,
+          "avg_ts": 10.457747,
+          "stddev_ts": 0.007181,
+          "samples_ns": [
+            48922202801,
+            48966453060,
+            48988157022
+          ],
+          "samples_ts": [
+            10.4656,
+            10.4561,
+            10.4515
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 571
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:31:50.720109+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:31:02Z\",\n    \"avg_ns\": 2877064526,\n    \"stddev_ns\": 1018885,\n    \"avg_ts\": 44.489795,\n    \"stddev_ts\": 0.015732,\n    \"samples_ns\": [ 2878174845, 2876176734, 2876842000 ],\n    \"samples_ts\": [ 44.4726, 44.5035, 44.4932 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:31:13Z\",\n    \"avg_ns\": 12193073767,\n    \"stddev_ns\": 33126168,\n    \"avg_ts\": 10.497815,\n    \"stddev_ts\": 0.028565,\n    \"samples_ns\": [ 12154828193, 12212744365, 12211648743 ],\n    \"samples_ts\": [ 10.5308, 10.4809, 10.4818 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:31:02Z",
+          "avg_ns": 2877064526,
+          "stddev_ns": 1018885,
+          "avg_ts": 44.489795,
+          "stddev_ts": 0.015732,
+          "samples_ns": [
+            2878174845,
+            2876176734,
+            2876842000
+          ],
+          "samples_ts": [
+            44.4726,
+            44.5035,
+            44.4932
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:31:13Z",
+          "avg_ns": 12193073767,
+          "stddev_ns": 33126168,
+          "avg_ts": 10.497815,
+          "stddev_ts": 0.028565,
+          "samples_ns": [
+            12154828193,
+            12212744365,
+            12211648743
+          ],
+          "samples_ts": [
+            10.5308,
+            10.4809,
+            10.4818
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 572
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:34:30.276082+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:31:51Z\",\n    \"avg_ns\": 2882287369,\n    \"stddev_ns\": 1007724,\n    \"avg_ts\": 44.409177,\n    \"stddev_ts\": 0.015483,\n    \"samples_ns\": [ 2881233706, 2882393383, 2883235020 ],\n    \"samples_ts\": [ 44.4254, 44.4075, 44.3946 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:32:03Z\",\n    \"avg_ns\": 48967724138,\n    \"stddev_ns\": 76337120,\n    \"avg_ts\": 10.455884,\n    \"stddev_ts\": 0.016303,\n    \"samples_ns\": [ 48974661839, 49040354955, 48888155622 ],\n    \"samples_ts\": [ 10.4544, 10.4404, 10.4729 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:31:51Z",
+          "avg_ns": 2882287369,
+          "stddev_ns": 1007724,
+          "avg_ts": 44.409177,
+          "stddev_ts": 0.015483,
+          "samples_ns": [
+            2881233706,
+            2882393383,
+            2883235020
+          ],
+          "samples_ts": [
+            44.4254,
+            44.4075,
+            44.3946
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:32:03Z",
+          "avg_ns": 48967724138,
+          "stddev_ns": 76337120,
+          "avg_ts": 10.455884,
+          "stddev_ts": 0.016303,
+          "samples_ns": [
+            48974661839,
+            49040354955,
+            48888155622
+          ],
+          "samples_ts": [
+            10.4544,
+            10.4404,
+            10.4729
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 573
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:35:54.952307+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:34:31Z\",\n    \"avg_ns\": 11792870999,\n    \"stddev_ns\": 1904848,\n    \"avg_ts\": 43.416061,\n    \"stddev_ts\": 0.007001,\n    \"samples_ns\": [ 11794870542, 11791085082, 11792657374 ],\n    \"samples_ts\": [ 43.4087, 43.4226, 43.4168 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:35:18Z\",\n    \"avg_ns\": 12125621009,\n    \"stddev_ns\": 26930761,\n    \"avg_ts\": 10.556195,\n    \"stddev_ts\": 0.023464,\n    \"samples_ns\": [ 12095884963, 12132609980, 12148368085 ],\n    \"samples_ts\": [ 10.5821, 10.5501, 10.5364 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:34:31Z",
+          "avg_ns": 11792870999,
+          "stddev_ns": 1904848,
+          "avg_ts": 43.416061,
+          "stddev_ts": 0.007001,
+          "samples_ns": [
+            11794870542,
+            11791085082,
+            11792657374
+          ],
+          "samples_ts": [
+            43.4087,
+            43.4226,
+            43.4168
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:35:18Z",
+          "avg_ns": 12125621009,
+          "stddev_ns": 26930761,
+          "avg_ts": 10.556195,
+          "stddev_ts": 0.023464,
+          "samples_ns": [
+            12095884963,
+            12132609980,
+            12148368085
+          ],
+          "samples_ts": [
+            10.5821,
+            10.5501,
+            10.5364
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 574
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:39:10.229957+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:35:55Z\",\n    \"avg_ns\": 11768598565,\n    \"stddev_ns\": 2800350,\n    \"avg_ts\": 43.505607,\n    \"stddev_ts\": 0.010336,\n    \"samples_ns\": [ 11771513221, 11765938320, 11768344156 ],\n    \"samples_ts\": [ 43.4948, 43.5154, 43.5065 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 1B Q4_K - Medium\",\n    \"model_size\": 799525120,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:36:43Z\",\n    \"avg_ns\": 49014013425,\n    \"stddev_ns\": 96888258,\n    \"avg_ts\": 10.446019,\n    \"stddev_ts\": 0.020668,\n    \"samples_ns\": [ 49088837984, 49048630652, 48904571640 ],\n    \"samples_ts\": [ 10.4301, 10.4386, 10.4694 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:35:55Z",
+          "avg_ns": 11768598565,
+          "stddev_ns": 2800350,
+          "avg_ts": 43.505607,
+          "stddev_ts": 0.010336,
+          "samples_ns": [
+            11771513221,
+            11765938320,
+            11768344156
+          ],
+          "samples_ts": [
+            43.4948,
+            43.5154,
+            43.5065
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 1B Q4_K - Medium",
+          "model_size": 799525120,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:36:43Z",
+          "avg_ns": 49014013425,
+          "stddev_ns": 96888258,
+          "avg_ts": 10.446019,
+          "stddev_ts": 0.020668,
+          "samples_ns": [
+            49088837984,
+            49048630652,
+            48904571640
+          ],
+          "samples_ts": [
+            10.4301,
+            10.4386,
+            10.4694
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 575
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:40:33.741705+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:39:29Z\",\n    \"avg_ns\": 6394042724,\n    \"stddev_ns\": 13442255,\n    \"avg_ts\": 20.018693,\n    \"stddev_ts\": 0.042035,\n    \"samples_ns\": [ 6386108381, 6386456589, 6409563202 ],\n    \"samples_ts\": [ 20.0435, 20.0424, 19.9702 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:39:55Z\",\n    \"avg_ns\": 12799794660,\n    \"stddev_ns\": 19627462,\n    \"avg_ts\": 10.000176,\n    \"stddev_ts\": 0.015321,\n    \"samples_ns\": [ 12822410297, 12787207385, 12789766298 ],\n    \"samples_ts\": [ 9.98252, 10.01, 10.008 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:39:29Z",
+          "avg_ns": 6394042724,
+          "stddev_ns": 13442255,
+          "avg_ts": 20.018693,
+          "stddev_ts": 0.042035,
+          "samples_ns": [
+            6386108381,
+            6386456589,
+            6409563202
+          ],
+          "samples_ts": [
+            20.0435,
+            20.0424,
+            19.9702
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:39:55Z",
+          "avg_ns": 12799794660,
+          "stddev_ns": 19627462,
+          "avg_ts": 10.000176,
+          "stddev_ts": 0.015321,
+          "samples_ns": [
+            12822410297,
+            12787207385,
+            12789766298
+          ],
+          "samples_ts": [
+            9.98252,
+            10.01,
+            10.008
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 576
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:43:33.478989+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:40:34Z\",\n    \"avg_ns\": 6373681013,\n    \"stddev_ns\": 362605,\n    \"avg_ts\": 20.082586,\n    \"stddev_ts\": 0.001115,\n    \"samples_ns\": [ 6373753151, 6373296798, 6373993091 ],\n    \"samples_ts\": [ 20.0824, 20.0838, 20.0816 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:41:00Z\",\n    \"avg_ns\": 51023041729,\n    \"stddev_ns\": 2480918,\n    \"avg_ts\": 10.034682,\n    \"stddev_ts\": 0.000484,\n    \"samples_ns\": [ 51025880408, 51021525758, 51021719023 ],\n    \"samples_ts\": [ 10.0341, 10.035, 10.0349 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:40:34Z",
+          "avg_ns": 6373681013,
+          "stddev_ns": 362605,
+          "avg_ts": 20.082586,
+          "stddev_ts": 0.001115,
+          "samples_ns": [
+            6373753151,
+            6373296798,
+            6373993091
+          ],
+          "samples_ts": [
+            20.0824,
+            20.0838,
+            20.0816
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:41:00Z",
+          "avg_ns": 51023041729,
+          "stddev_ns": 2480918,
+          "avg_ts": 10.034682,
+          "stddev_ts": 0.000484,
+          "samples_ns": [
+            51025880408,
+            51021525758,
+            51021719023
+          ],
+          "samples_ts": [
+            10.0341,
+            10.035,
+            10.0349
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 577
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:45:55.797879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:43:34Z\",\n    \"avg_ns\": 25721361217,\n    \"stddev_ns\": 1163905,\n    \"avg_ts\": 19.905634,\n    \"stddev_ts\": 0.000901,\n    \"samples_ns\": [ 25722575401, 25720255132, 25721253118 ],\n    \"samples_ts\": [ 19.9047, 19.9065, 19.9057 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:45:17Z\",\n    \"avg_ns\": 12767882918,\n    \"stddev_ns\": 4411754,\n    \"avg_ts\": 10.025155,\n    \"stddev_ts\": 0.003461,\n    \"samples_ns\": [ 12772647713, 12767053130, 12763947913 ],\n    \"samples_ts\": [ 10.0214, 10.0258, 10.0282 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:43:34Z",
+          "avg_ns": 25721361217,
+          "stddev_ns": 1163905,
+          "avg_ts": 19.905634,
+          "stddev_ts": 0.000901,
+          "samples_ns": [
+            25722575401,
+            25720255132,
+            25721253118
+          ],
+          "samples_ts": [
+            19.9047,
+            19.9065,
+            19.9057
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:45:17Z",
+          "avg_ns": 12767882918,
+          "stddev_ns": 4411754,
+          "avg_ts": 10.025155,
+          "stddev_ts": 0.003461,
+          "samples_ns": [
+            12772647713,
+            12767053130,
+            12763947913
+          ],
+          "samples_ts": [
+            10.0214,
+            10.0258,
+            10.0282
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 578
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:50:14.578802+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:45:56Z\",\n    \"avg_ns\": 25719331392,\n    \"stddev_ns\": 423839,\n    \"avg_ts\": 19.907205,\n    \"stddev_ts\": 0.000304,\n    \"samples_ns\": [ 25719678247, 25719410332, 25718905598 ],\n    \"samples_ts\": [ 19.9069, 19.9071, 19.9075 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:47:39Z\",\n    \"avg_ns\": 51590153543,\n    \"stddev_ns\": 3232020,\n    \"avg_ts\": 9.924374,\n    \"stddev_ts\": 0.000620,\n    \"samples_ns\": [ 51587430122, 51589317164, 51593713344 ],\n    \"samples_ts\": [ 9.9249, 9.92454, 9.92369 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:45:56Z",
+          "avg_ns": 25719331392,
+          "stddev_ns": 423839,
+          "avg_ts": 19.907205,
+          "stddev_ts": 0.000304,
+          "samples_ns": [
+            25719678247,
+            25719410332,
+            25718905598
+          ],
+          "samples_ts": [
+            19.9069,
+            19.9071,
+            19.9075
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:47:39Z",
+          "avg_ns": 51590153543,
+          "stddev_ns": 3232020,
+          "avg_ts": 9.924374,
+          "stddev_ts": 0.00062,
+          "samples_ns": [
+            51587430122,
+            51589317164,
+            51593713344
+          ],
+          "samples_ts": [
+            9.9249,
+            9.92454,
+            9.92369
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 579
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:51:19.416465+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:50:15Z\",\n    \"avg_ns\": 6377017544,\n    \"stddev_ns\": 255656,\n    \"avg_ts\": 20.072079,\n    \"stddev_ts\": 0.000805,\n    \"samples_ns\": [ 6377247439, 6377062976, 6376742217 ],\n    \"samples_ts\": [ 20.0714, 20.0719, 20.0729 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:50:41Z\",\n    \"avg_ns\": 12714177137,\n    \"stddev_ns\": 912764,\n    \"avg_ts\": 10.067502,\n    \"stddev_ts\": 0.000723,\n    \"samples_ns\": [ 12714040163, 12715150648, 12713340600 ],\n    \"samples_ts\": [ 10.0676, 10.0667, 10.0682 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:50:15Z",
+          "avg_ns": 6377017544,
+          "stddev_ns": 255656,
+          "avg_ts": 20.072079,
+          "stddev_ts": 0.000805,
+          "samples_ns": [
+            6377247439,
+            6377062976,
+            6376742217
+          ],
+          "samples_ts": [
+            20.0714,
+            20.0719,
+            20.0729
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:50:41Z",
+          "avg_ns": 12714177137,
+          "stddev_ns": 912764,
+          "avg_ts": 10.067502,
+          "stddev_ts": 0.000723,
+          "samples_ns": [
+            12714040163,
+            12715150648,
+            12713340600
+          ],
+          "samples_ts": [
+            10.0676,
+            10.0667,
+            10.0682
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 580
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:54:19.561226+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:51:20Z\",\n    \"avg_ns\": 6377243380,\n    \"stddev_ns\": 139480,\n    \"avg_ts\": 20.071368,\n    \"stddev_ts\": 0.000258,\n    \"samples_ns\": [ 6377169980, 6377331652, 6377228510 ],\n    \"samples_ts\": [ 20.0716, 20.0711, 20.0714 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:51:45Z\",\n    \"avg_ns\": 51183579927,\n    \"stddev_ns\": 847628,\n    \"avg_ts\": 10.003208,\n    \"stddev_ts\": 0.000153,\n    \"samples_ns\": [ 51182760615, 51184325220, 51183653948 ],\n    \"samples_ts\": [ 10.0034, 10.0031, 10.0032 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:51:20Z",
+          "avg_ns": 6377243380,
+          "stddev_ns": 139480,
+          "avg_ts": 20.071368,
+          "stddev_ts": 0.000258,
+          "samples_ns": [
+            6377169980,
+            6377331652,
+            6377228510
+          ],
+          "samples_ts": [
+            20.0716,
+            20.0711,
+            20.0714
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:51:45Z",
+          "avg_ns": 51183579927,
+          "stddev_ns": 847628,
+          "avg_ts": 10.003208,
+          "stddev_ts": 0.000153,
+          "samples_ns": [
+            51182760615,
+            51184325220,
+            51183653948
+          ],
+          "samples_ts": [
+            10.0034,
+            10.0031,
+            10.0032
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 581
+    },
+    {
+      "timestamp_utc": "2025-12-09T06:56:41.237532+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:54:20Z\",\n    \"avg_ns\": 25654252160,\n    \"stddev_ns\": 6960154,\n    \"avg_ts\": 19.957706,\n    \"stddev_ts\": 0.005414,\n    \"samples_ns\": [ 25662222574, 25651160330, 25649373576 ],\n    \"samples_ts\": [ 19.9515, 19.9601, 19.9615 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:56:03Z\",\n    \"avg_ns\": 12659836215,\n    \"stddev_ns\": 2875563,\n    \"avg_ts\": 10.110716,\n    \"stddev_ts\": 0.002293,\n    \"samples_ns\": [ 12660772553, 12662122316, 12656613778 ],\n    \"samples_ts\": [ 10.11, 10.1089, 10.1133 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:54:20Z",
+          "avg_ns": 25654252160,
+          "stddev_ns": 6960154,
+          "avg_ts": 19.957706,
+          "stddev_ts": 0.005414,
+          "samples_ns": [
+            25662222574,
+            25651160330,
+            25649373576
+          ],
+          "samples_ts": [
+            19.9515,
+            19.9601,
+            19.9615
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:56:03Z",
+          "avg_ns": 12659836215,
+          "stddev_ns": 2875563,
+          "avg_ts": 10.110716,
+          "stddev_ts": 0.002293,
+          "samples_ns": [
+            12660772553,
+            12662122316,
+            12656613778
+          ],
+          "samples_ts": [
+            10.11,
+            10.1089,
+            10.1133
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 582
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:00:57.886307+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:56:42Z\",\n    \"avg_ns\": 25634100719,\n    \"stddev_ns\": 207425,\n    \"avg_ts\": 19.973394,\n    \"stddev_ts\": 0.000162,\n    \"samples_ns\": [ 25634314987, 25634086280, 25633900890 ],\n    \"samples_ts\": [ 19.9732, 19.9734, 19.9735 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T06:58:24Z\",\n    \"avg_ns\": 51009786335,\n    \"stddev_ns\": 377269,\n    \"avg_ts\": 10.037290,\n    \"stddev_ts\": 0.000059,\n    \"samples_ns\": [ 51009978177, 51009437986, 51009942843 ],\n    \"samples_ts\": [ 10.0373, 10.0374, 10.0373 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:56:42Z",
+          "avg_ns": 25634100719,
+          "stddev_ns": 207425,
+          "avg_ts": 19.973394,
+          "stddev_ts": 0.000162,
+          "samples_ns": [
+            25634314987,
+            25634086280,
+            25633900890
+          ],
+          "samples_ts": [
+            19.9732,
+            19.9734,
+            19.9735
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T06:58:24Z",
+          "avg_ns": 51009786335,
+          "stddev_ns": 377269,
+          "avg_ts": 10.03729,
+          "stddev_ts": 5.9e-05,
+          "samples_ns": [
+            51009978177,
+            51009437986,
+            51009942843
+          ],
+          "samples_ts": [
+            10.0373,
+            10.0374,
+            10.0373
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 583
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:02:02.588205+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:00:58Z\",\n    \"avg_ns\": 6376473432,\n    \"stddev_ns\": 167375,\n    \"avg_ts\": 20.073792,\n    \"stddev_ts\": 0.000463,\n    \"samples_ns\": [ 6376512977, 6376310603, 6376596717 ],\n    \"samples_ts\": [ 20.0737, 20.0743, 20.0734 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:01:24Z\",\n    \"avg_ns\": 12697303110,\n    \"stddev_ns\": 905933,\n    \"avg_ts\": 10.080881,\n    \"stddev_ts\": 0.000708,\n    \"samples_ns\": [ 12696972219, 12696624043, 12698313070 ],\n    \"samples_ts\": [ 10.0811, 10.0814, 10.0801 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:00:58Z",
+          "avg_ns": 6376473432,
+          "stddev_ns": 167375,
+          "avg_ts": 20.073792,
+          "stddev_ts": 0.000463,
+          "samples_ns": [
+            6376512977,
+            6376310603,
+            6376596717
+          ],
+          "samples_ts": [
+            20.0737,
+            20.0743,
+            20.0734
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:01:24Z",
+          "avg_ns": 12697303110,
+          "stddev_ns": 905933,
+          "avg_ts": 10.080881,
+          "stddev_ts": 0.000708,
+          "samples_ns": [
+            12696972219,
+            12696624043,
+            12698313070
+          ],
+          "samples_ts": [
+            10.0811,
+            10.0814,
+            10.0801
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 584
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:05:02.792579+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:02:03Z\",\n    \"avg_ns\": 6378564733,\n    \"stddev_ns\": 293449,\n    \"avg_ts\": 20.067210,\n    \"stddev_ts\": 0.000852,\n    \"samples_ns\": [ 6378876882, 6378391978, 6378425341 ],\n    \"samples_ts\": [ 20.0662, 20.0678, 20.0676 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:02:28Z\",\n    \"avg_ns\": 51205073208,\n    \"stddev_ns\": 1205304,\n    \"avg_ts\": 9.999009,\n    \"stddev_ts\": 0.000235,\n    \"samples_ns\": [ 51204168413, 51204609762, 51206441449 ],\n    \"samples_ts\": [ 9.99919, 9.9991, 9.99874 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:02:03Z",
+          "avg_ns": 6378564733,
+          "stddev_ns": 293449,
+          "avg_ts": 20.06721,
+          "stddev_ts": 0.000852,
+          "samples_ns": [
+            6378876882,
+            6378391978,
+            6378425341
+          ],
+          "samples_ts": [
+            20.0662,
+            20.0678,
+            20.0676
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:02:28Z",
+          "avg_ns": 51205073208,
+          "stddev_ns": 1205304,
+          "avg_ts": 9.999009,
+          "stddev_ts": 0.000235,
+          "samples_ns": [
+            51204168413,
+            51204609762,
+            51206441449
+          ],
+          "samples_ts": [
+            9.99919,
+            9.9991,
+            9.99874
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 585
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:07:27.671172+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:05:03Z\",\n    \"avg_ns\": 26446192587,\n    \"stddev_ns\": 1438870,\n    \"avg_ts\": 19.360065,\n    \"stddev_ts\": 0.001040,\n    \"samples_ns\": [ 26447024926, 26447000292, 26444552545 ],\n    \"samples_ts\": [ 19.3595, 19.3595, 19.3613 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:06:49Z\",\n    \"avg_ns\": 12663761026,\n    \"stddev_ns\": 3786006,\n    \"avg_ts\": 10.107582,\n    \"stddev_ts\": 0.003020,\n    \"samples_ns\": [ 12660041857, 12667607320, 12663633902 ],\n    \"samples_ts\": [ 10.1106, 10.1045, 10.1077 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:05:03Z",
+          "avg_ns": 26446192587,
+          "stddev_ns": 1438870,
+          "avg_ts": 19.360065,
+          "stddev_ts": 0.00104,
+          "samples_ns": [
+            26447024926,
+            26447000292,
+            26444552545
+          ],
+          "samples_ts": [
+            19.3595,
+            19.3595,
+            19.3613
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:06:49Z",
+          "avg_ns": 12663761026,
+          "stddev_ns": 3786006,
+          "avg_ts": 10.107582,
+          "stddev_ts": 0.00302,
+          "samples_ns": [
+            12660041857,
+            12667607320,
+            12663633902
+          ],
+          "samples_ts": [
+            10.1106,
+            10.1045,
+            10.1077
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 586
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:11:47.684819+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:07:28Z\",\n    \"avg_ns\": 26456345675,\n    \"stddev_ns\": 2502525,\n    \"avg_ts\": 19.352635,\n    \"stddev_ts\": 0.001831,\n    \"samples_ns\": [ 26453612033, 26458523691, 26456901301 ],\n    \"samples_ts\": [ 19.3546, 19.351, 19.3522 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:09:14Z\",\n    \"avg_ns\": 51033249113,\n    \"stddev_ns\": 600559,\n    \"avg_ts\": 10.032675,\n    \"stddev_ts\": 0.000118,\n    \"samples_ns\": [ 51033694309, 51032566056, 51033486974 ],\n    \"samples_ts\": [ 10.0326, 10.0328, 10.0326 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:07:28Z",
+          "avg_ns": 26456345675,
+          "stddev_ns": 2502525,
+          "avg_ts": 19.352635,
+          "stddev_ts": 0.001831,
+          "samples_ns": [
+            26453612033,
+            26458523691,
+            26456901301
+          ],
+          "samples_ts": [
+            19.3546,
+            19.351,
+            19.3522
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:09:14Z",
+          "avg_ns": 51033249113,
+          "stddev_ns": 600559,
+          "avg_ts": 10.032675,
+          "stddev_ts": 0.000118,
+          "samples_ns": [
+            51033694309,
+            51032566056,
+            51033486974
+          ],
+          "samples_ts": [
+            10.0326,
+            10.0328,
+            10.0326
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 587
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:12:52.196105+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:11:48Z\",\n    \"avg_ns\": 6376922315,\n    \"stddev_ns\": 367019,\n    \"avg_ts\": 20.072379,\n    \"stddev_ts\": 0.001155,\n    \"samples_ns\": [ 6376675375, 6377344061, 6376747509 ],\n    \"samples_ts\": [ 20.0732, 20.0711, 20.0729 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:12:14Z\",\n    \"avg_ns\": 12632202516,\n    \"stddev_ns\": 1577512,\n    \"avg_ts\": 10.132833,\n    \"stddev_ts\": 0.001265,\n    \"samples_ns\": [ 12633982494, 12631647644, 12630977410 ],\n    \"samples_ts\": [ 10.1314, 10.1333, 10.1338 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:11:48Z",
+          "avg_ns": 6376922315,
+          "stddev_ns": 367019,
+          "avg_ts": 20.072379,
+          "stddev_ts": 0.001155,
+          "samples_ns": [
+            6376675375,
+            6377344061,
+            6376747509
+          ],
+          "samples_ts": [
+            20.0732,
+            20.0711,
+            20.0729
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:12:14Z",
+          "avg_ns": 12632202516,
+          "stddev_ns": 1577512,
+          "avg_ts": 10.132833,
+          "stddev_ts": 0.001265,
+          "samples_ns": [
+            12633982494,
+            12631647644,
+            12630977410
+          ],
+          "samples_ts": [
+            10.1314,
+            10.1333,
+            10.1338
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 588
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:15:52.041789+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:12:53Z\",\n    \"avg_ns\": 6376231026,\n    \"stddev_ns\": 301166,\n    \"avg_ts\": 20.074555,\n    \"stddev_ts\": 0.000914,\n    \"samples_ns\": [ 6375897762, 6376429689, 6376365628 ],\n    \"samples_ts\": [ 20.0756, 20.0739, 20.0741 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:13:18Z\",\n    \"avg_ns\": 51081109098,\n    \"stddev_ns\": 6497895,\n    \"avg_ts\": 10.023275,\n    \"stddev_ts\": 0.001274,\n    \"samples_ns\": [ 51074385242, 51081605229, 51087336825 ],\n    \"samples_ts\": [ 10.0246, 10.0232, 10.0221 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:12:53Z",
+          "avg_ns": 6376231026,
+          "stddev_ns": 301166,
+          "avg_ts": 20.074555,
+          "stddev_ts": 0.000914,
+          "samples_ns": [
+            6375897762,
+            6376429689,
+            6376365628
+          ],
+          "samples_ts": [
+            20.0756,
+            20.0739,
+            20.0741
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:13:18Z",
+          "avg_ns": 51081109098,
+          "stddev_ns": 6497895,
+          "avg_ts": 10.023275,
+          "stddev_ts": 0.001274,
+          "samples_ns": [
+            51074385242,
+            51081605229,
+            51087336825
+          ],
+          "samples_ts": [
+            10.0246,
+            10.0232,
+            10.0221
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 589
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:18:13.813561+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:15:52Z\",\n    \"avg_ns\": 25725391649,\n    \"stddev_ns\": 695710,\n    \"avg_ts\": 19.902515,\n    \"stddev_ts\": 0.000538,\n    \"samples_ns\": [ 25724627982, 25725557561, 25725989404 ],\n    \"samples_ts\": [ 19.9031, 19.9024, 19.9021 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:17:35Z\",\n    \"avg_ns\": 12591974344,\n    \"stddev_ns\": 964244,\n    \"avg_ts\": 10.165205,\n    \"stddev_ts\": 0.000778,\n    \"samples_ns\": [ 12593075403, 12591280570, 12591567059 ],\n    \"samples_ts\": [ 10.1643, 10.1658, 10.1655 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:15:52Z",
+          "avg_ns": 25725391649,
+          "stddev_ns": 695710,
+          "avg_ts": 19.902515,
+          "stddev_ts": 0.000538,
+          "samples_ns": [
+            25724627982,
+            25725557561,
+            25725989404
+          ],
+          "samples_ts": [
+            19.9031,
+            19.9024,
+            19.9021
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:17:35Z",
+          "avg_ns": 12591974344,
+          "stddev_ns": 964244,
+          "avg_ts": 10.165205,
+          "stddev_ts": 0.000778,
+          "samples_ns": [
+            12593075403,
+            12591280570,
+            12591567059
+          ],
+          "samples_ts": [
+            10.1643,
+            10.1658,
+            10.1655
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 590
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:22:30.938240+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:18:14Z\",\n    \"avg_ns\": 25721036284,\n    \"stddev_ns\": 874838,\n    \"avg_ts\": 19.905885,\n    \"stddev_ts\": 0.000677,\n    \"samples_ns\": [ 25721378442, 25721688332, 25720042078 ],\n    \"samples_ts\": [ 19.9056, 19.9054, 19.9067 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:19:57Z\",\n    \"avg_ns\": 51047023413,\n    \"stddev_ns\": 3297800,\n    \"avg_ts\": 10.029968,\n    \"stddev_ts\": 0.000646,\n    \"samples_ns\": [ 51043472537, 51047629278, 51049968425 ],\n    \"samples_ts\": [ 10.0307, 10.0298, 10.0294 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:18:14Z",
+          "avg_ns": 25721036284,
+          "stddev_ns": 874838,
+          "avg_ts": 19.905885,
+          "stddev_ts": 0.000677,
+          "samples_ns": [
+            25721378442,
+            25721688332,
+            25720042078
+          ],
+          "samples_ts": [
+            19.9056,
+            19.9054,
+            19.9067
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:19:57Z",
+          "avg_ns": 51047023413,
+          "stddev_ns": 3297800,
+          "avg_ts": 10.029968,
+          "stddev_ts": 0.000646,
+          "samples_ns": [
+            51043472537,
+            51047629278,
+            51049968425
+          ],
+          "samples_ts": [
+            10.0307,
+            10.0298,
+            10.0294
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 591
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:23:35.182732+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:22:31Z\",\n    \"avg_ns\": 6377330322,\n    \"stddev_ns\": 180851,\n    \"avg_ts\": 20.071095,\n    \"stddev_ts\": 0.000445,\n    \"samples_ns\": [ 6377484950, 6377297958, 6377208060 ],\n    \"samples_ts\": [ 20.0706, 20.0712, 20.0715 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:22:57Z\",\n    \"avg_ns\": 12552446511,\n    \"stddev_ns\": 237777,\n    \"avg_ts\": 10.197215,\n    \"stddev_ts\": 0.000170,\n    \"samples_ns\": [ 12552404113, 12552674199, 12552261222 ],\n    \"samples_ts\": [ 10.1972, 10.197, 10.1974 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:22:31Z",
+          "avg_ns": 6377330322,
+          "stddev_ns": 180851,
+          "avg_ts": 20.071095,
+          "stddev_ts": 0.000445,
+          "samples_ns": [
+            6377484950,
+            6377297958,
+            6377208060
+          ],
+          "samples_ts": [
+            20.0706,
+            20.0712,
+            20.0715
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:22:57Z",
+          "avg_ns": 12552446511,
+          "stddev_ns": 237777,
+          "avg_ts": 10.197215,
+          "stddev_ts": 0.00017,
+          "samples_ns": [
+            12552404113,
+            12552674199,
+            12552261222
+          ],
+          "samples_ts": [
+            10.1972,
+            10.197,
+            10.1974
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 592
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:26:35.424994+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:23:36Z\",\n    \"avg_ns\": 6374201843,\n    \"stddev_ns\": 140706,\n    \"avg_ts\": 20.080946,\n    \"stddev_ts\": 0.000265,\n    \"samples_ns\": [ 6374159633, 6374298537, 6374147361 ],\n    \"samples_ts\": [ 20.0811, 20.0806, 20.0811 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:24:01Z\",\n    \"avg_ns\": 51223523472,\n    \"stddev_ns\": 2299936,\n    \"avg_ts\": 9.995408,\n    \"stddev_ts\": 0.000447,\n    \"samples_ns\": [ 51223335189, 51221334656, 51225900572 ],\n    \"samples_ts\": [ 9.99544, 9.99583, 9.99494 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:23:36Z",
+          "avg_ns": 6374201843,
+          "stddev_ns": 140706,
+          "avg_ts": 20.080946,
+          "stddev_ts": 0.000265,
+          "samples_ns": [
+            6374159633,
+            6374298537,
+            6374147361
+          ],
+          "samples_ts": [
+            20.0811,
+            20.0806,
+            20.0811
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:24:01Z",
+          "avg_ns": 51223523472,
+          "stddev_ns": 2299936,
+          "avg_ts": 9.995408,
+          "stddev_ts": 0.000447,
+          "samples_ns": [
+            51223335189,
+            51221334656,
+            51225900572
+          ],
+          "samples_ts": [
+            9.99544,
+            9.99583,
+            9.99494
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 593
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:28:57.290958+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:26:36Z\",\n    \"avg_ns\": 25656099006,\n    \"stddev_ns\": 701674,\n    \"avg_ts\": 19.956268,\n    \"stddev_ts\": 0.000517,\n    \"samples_ns\": [ 25656380548, 25656575962, 25655340510 ],\n    \"samples_ts\": [ 19.956, 19.9559, 19.9569 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:28:18Z\",\n    \"avg_ns\": 12711406389,\n    \"stddev_ns\": 6201045,\n    \"avg_ts\": 10.069698,\n    \"stddev_ts\": 0.004910,\n    \"samples_ns\": [ 12705722454, 12710480440, 12718016275 ],\n    \"samples_ts\": [ 10.0742, 10.0704, 10.0645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:26:36Z",
+          "avg_ns": 25656099006,
+          "stddev_ns": 701674,
+          "avg_ts": 19.956268,
+          "stddev_ts": 0.000517,
+          "samples_ns": [
+            25656380548,
+            25656575962,
+            25655340510
+          ],
+          "samples_ts": [
+            19.956,
+            19.9559,
+            19.9569
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:28:18Z",
+          "avg_ns": 12711406389,
+          "stddev_ns": 6201045,
+          "avg_ts": 10.069698,
+          "stddev_ts": 0.00491,
+          "samples_ns": [
+            12705722454,
+            12710480440,
+            12718016275
+          ],
+          "samples_ts": [
+            10.0742,
+            10.0704,
+            10.0645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 594
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:33:14.942489+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:28:58Z\",\n    \"avg_ns\": 25647430548,\n    \"stddev_ns\": 412114,\n    \"avg_ts\": 19.963013,\n    \"stddev_ts\": 0.000296,\n    \"samples_ns\": [ 25647826618, 25647069611, 25647395416 ],\n    \"samples_ts\": [ 19.9627, 19.9633, 19.963 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:30:40Z\",\n    \"avg_ns\": 51323507704,\n    \"stddev_ns\": 1056116,\n    \"avg_ts\": 9.975935,\n    \"stddev_ts\": 0.000196,\n    \"samples_ns\": [ 51322348631, 51324158978, 51324015505 ],\n    \"samples_ts\": [ 9.97616, 9.97581, 9.97584 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:28:58Z",
+          "avg_ns": 25647430548,
+          "stddev_ns": 412114,
+          "avg_ts": 19.963013,
+          "stddev_ts": 0.000296,
+          "samples_ns": [
+            25647826618,
+            25647069611,
+            25647395416
+          ],
+          "samples_ts": [
+            19.9627,
+            19.9633,
+            19.963
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:30:40Z",
+          "avg_ns": 51323507704,
+          "stddev_ns": 1056116,
+          "avg_ts": 9.975935,
+          "stddev_ts": 0.000196,
+          "samples_ns": [
+            51322348631,
+            51324158978,
+            51324015505
+          ],
+          "samples_ts": [
+            9.97616,
+            9.97581,
+            9.97584
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 595
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:34:19.400000+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:33:15Z\",\n    \"avg_ns\": 6375097310,\n    \"stddev_ns\": 152505,\n    \"avg_ts\": 20.078125,\n    \"stddev_ts\": 0.000323,\n    \"samples_ns\": [ 6375162653, 6374979168, 6375150111 ],\n    \"samples_ts\": [ 20.0779, 20.0785, 20.078 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:33:41Z\",\n    \"avg_ns\": 12624602148,\n    \"stddev_ns\": 821341,\n    \"avg_ts\": 10.138933,\n    \"stddev_ts\": 0.000647,\n    \"samples_ns\": [ 12623678035, 12624970060, 12625158351 ],\n    \"samples_ts\": [ 10.1397, 10.1386, 10.1385 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:33:15Z",
+          "avg_ns": 6375097310,
+          "stddev_ns": 152505,
+          "avg_ts": 20.078125,
+          "stddev_ts": 0.000323,
+          "samples_ns": [
+            6375162653,
+            6374979168,
+            6375150111
+          ],
+          "samples_ts": [
+            20.0779,
+            20.0785,
+            20.078
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:33:41Z",
+          "avg_ns": 12624602148,
+          "stddev_ns": 821341,
+          "avg_ts": 10.138933,
+          "stddev_ts": 0.000647,
+          "samples_ns": [
+            12623678035,
+            12624970060,
+            12625158351
+          ],
+          "samples_ts": [
+            10.1397,
+            10.1386,
+            10.1385
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 596
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:37:20.501128+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:34:20Z\",\n    \"avg_ns\": 6376078481,\n    \"stddev_ns\": 116081,\n    \"avg_ts\": 20.075035,\n    \"stddev_ts\": 0.000265,\n    \"samples_ns\": [ 6376096552, 6376152235, 6375986657 ],\n    \"samples_ts\": [ 20.075, 20.0748, 20.0753 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:34:45Z\",\n    \"avg_ns\": 51502305232,\n    \"stddev_ns\": 1790451,\n    \"avg_ts\": 9.941303,\n    \"stddev_ts\": 0.000340,\n    \"samples_ns\": [ 51502901469, 51503691184, 51500323045 ],\n    \"samples_ts\": [ 9.94119, 9.94104, 9.94169 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:34:20Z",
+          "avg_ns": 6376078481,
+          "stddev_ns": 116081,
+          "avg_ts": 20.075035,
+          "stddev_ts": 0.000265,
+          "samples_ns": [
+            6376096552,
+            6376152235,
+            6375986657
+          ],
+          "samples_ts": [
+            20.075,
+            20.0748,
+            20.0753
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:34:45Z",
+          "avg_ns": 51502305232,
+          "stddev_ns": 1790451,
+          "avg_ts": 9.941303,
+          "stddev_ts": 0.00034,
+          "samples_ns": [
+            51502901469,
+            51503691184,
+            51500323045
+          ],
+          "samples_ts": [
+            9.94119,
+            9.94104,
+            9.94169
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 597
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:39:45.346649+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:37:21Z\",\n    \"avg_ns\": 26452702165,\n    \"stddev_ns\": 274400,\n    \"avg_ts\": 19.355301,\n    \"stddev_ts\": 0.000109,\n    \"samples_ns\": [ 26452742934, 26452827191, 26452536372 ],\n    \"samples_ts\": [ 19.3553, 19.3552, 19.3554 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:39:07Z\",\n    \"avg_ns\": 12627969767,\n    \"stddev_ns\": 577223,\n    \"avg_ts\": 10.136230,\n    \"stddev_ts\": 0.000454,\n    \"samples_ns\": [ 12627354777, 12628085170, 12628469355 ],\n    \"samples_ts\": [ 10.1367, 10.1361, 10.1358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:37:21Z",
+          "avg_ns": 26452702165,
+          "stddev_ns": 274400,
+          "avg_ts": 19.355301,
+          "stddev_ts": 0.000109,
+          "samples_ns": [
+            26452742934,
+            26452827191,
+            26452536372
+          ],
+          "samples_ts": [
+            19.3553,
+            19.3552,
+            19.3554
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:39:07Z",
+          "avg_ns": 12627969767,
+          "stddev_ns": 577223,
+          "avg_ts": 10.13623,
+          "stddev_ts": 0.000454,
+          "samples_ns": [
+            12627354777,
+            12628085170,
+            12628469355
+          ],
+          "samples_ts": [
+            10.1367,
+            10.1361,
+            10.1358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 598
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:44:05.327384+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:39:46Z\",\n    \"avg_ns\": 26466083870,\n    \"stddev_ns\": 1943770,\n    \"avg_ts\": 19.345514,\n    \"stddev_ts\": 0.001421,\n    \"samples_ns\": [ 26467606212, 26466751020, 26463894378 ],\n    \"samples_ts\": [ 19.3444, 19.345, 19.3471 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:41:32Z\",\n    \"avg_ns\": 51010426568,\n    \"stddev_ns\": 1063059,\n    \"avg_ts\": 10.037164,\n    \"stddev_ts\": 0.000209,\n    \"samples_ns\": [ 51011653864, 51009833016, 51009792824 ],\n    \"samples_ts\": [ 10.0369, 10.0373, 10.0373 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:39:46Z",
+          "avg_ns": 26466083870,
+          "stddev_ns": 1943770,
+          "avg_ts": 19.345514,
+          "stddev_ts": 0.001421,
+          "samples_ns": [
+            26467606212,
+            26466751020,
+            26463894378
+          ],
+          "samples_ts": [
+            19.3444,
+            19.345,
+            19.3471
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:41:32Z",
+          "avg_ns": 51010426568,
+          "stddev_ns": 1063059,
+          "avg_ts": 10.037164,
+          "stddev_ts": 0.000209,
+          "samples_ns": [
+            51011653864,
+            51009833016,
+            51009792824
+          ],
+          "samples_ts": [
+            10.0369,
+            10.0373,
+            10.0373
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 599
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:45:10.071378+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:44:06Z\",\n    \"avg_ns\": 6377135138,\n    \"stddev_ns\": 136923,\n    \"avg_ts\": 20.071709,\n    \"stddev_ts\": 0.000244,\n    \"samples_ns\": [ 6377186461, 6377046088, 6377172867 ],\n    \"samples_ts\": [ 20.0715, 20.072, 20.0716 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:44:31Z\",\n    \"avg_ns\": 12719567122,\n    \"stddev_ns\": 1636559,\n    \"avg_ts\": 10.063236,\n    \"stddev_ts\": 0.001292,\n    \"samples_ns\": [ 12717766368, 12719984208, 12720950791 ],\n    \"samples_ts\": [ 10.0647, 10.0629, 10.0621 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:44:06Z",
+          "avg_ns": 6377135138,
+          "stddev_ns": 136923,
+          "avg_ts": 20.071709,
+          "stddev_ts": 0.000244,
+          "samples_ns": [
+            6377186461,
+            6377046088,
+            6377172867
+          ],
+          "samples_ts": [
+            20.0715,
+            20.072,
+            20.0716
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:44:31Z",
+          "avg_ns": 12719567122,
+          "stddev_ns": 1636559,
+          "avg_ts": 10.063236,
+          "stddev_ts": 0.001292,
+          "samples_ns": [
+            12717766368,
+            12719984208,
+            12720950791
+          ],
+          "samples_ts": [
+            10.0647,
+            10.0629,
+            10.0621
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 600
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:48:10.239191+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:45:10Z\",\n    \"avg_ns\": 6376225585,\n    \"stddev_ns\": 153440,\n    \"avg_ts\": 20.074572,\n    \"stddev_ts\": 0.000483,\n    \"samples_ns\": [ 6376303130, 6376324777, 6376048848 ],\n    \"samples_ts\": [ 20.0743, 20.0743, 20.0751 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:45:36Z\",\n    \"avg_ns\": 51194900248,\n    \"stddev_ns\": 787543,\n    \"avg_ts\": 10.000996,\n    \"stddev_ts\": 0.000147,\n    \"samples_ns\": [ 51195750952, 51194312856, 51194636937 ],\n    \"samples_ts\": [ 10.0008, 10.0011, 10.001 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:45:10Z",
+          "avg_ns": 6376225585,
+          "stddev_ns": 153440,
+          "avg_ts": 20.074572,
+          "stddev_ts": 0.000483,
+          "samples_ns": [
+            6376303130,
+            6376324777,
+            6376048848
+          ],
+          "samples_ts": [
+            20.0743,
+            20.0743,
+            20.0751
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:45:36Z",
+          "avg_ns": 51194900248,
+          "stddev_ns": 787543,
+          "avg_ts": 10.000996,
+          "stddev_ts": 0.000147,
+          "samples_ns": [
+            51195750952,
+            51194312856,
+            51194636937
+          ],
+          "samples_ts": [
+            10.0008,
+            10.0011,
+            10.001
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 601
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:50:32.143887+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:48:11Z\",\n    \"avg_ns\": 25731523250,\n    \"stddev_ns\": 1147031,\n    \"avg_ts\": 19.897773,\n    \"stddev_ts\": 0.000869,\n    \"samples_ns\": [ 25732603281, 25731607230, 25730359241 ],\n    \"samples_ts\": [ 19.8969, 19.8977, 19.8987 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:49:54Z\",\n    \"avg_ns\": 12615284339,\n    \"stddev_ns\": 302938,\n    \"avg_ts\": 10.146422,\n    \"stddev_ts\": 0.000226,\n    \"samples_ns\": [ 12615486580, 12615403402, 12614963036 ],\n    \"samples_ts\": [ 10.1463, 10.1463, 10.1467 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:48:11Z",
+          "avg_ns": 25731523250,
+          "stddev_ns": 1147031,
+          "avg_ts": 19.897773,
+          "stddev_ts": 0.000869,
+          "samples_ns": [
+            25732603281,
+            25731607230,
+            25730359241
+          ],
+          "samples_ts": [
+            19.8969,
+            19.8977,
+            19.8987
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:49:54Z",
+          "avg_ns": 12615284339,
+          "stddev_ns": 302938,
+          "avg_ts": 10.146422,
+          "stddev_ts": 0.000226,
+          "samples_ns": [
+            12615486580,
+            12615403402,
+            12614963036
+          ],
+          "samples_ts": [
+            10.1463,
+            10.1463,
+            10.1467
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 602
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:54:50.196105+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:50:33Z\",\n    \"avg_ns\": 25704739624,\n    \"stddev_ns\": 341935,\n    \"avg_ts\": 19.918506,\n    \"stddev_ts\": 0.000198,\n    \"samples_ns\": [ 25705035168, 25704590907, 25704592799 ],\n    \"samples_ts\": [ 19.9183, 19.9186, 19.9186 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:52:15Z\",\n    \"avg_ns\": 51386428871,\n    \"stddev_ns\": 4107642,\n    \"avg_ts\": 9.963720,\n    \"stddev_ts\": 0.000795,\n    \"samples_ns\": [ 51383000069, 51385314169, 51390972376 ],\n    \"samples_ts\": [ 9.96439, 9.96394, 9.96284 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:50:33Z",
+          "avg_ns": 25704739624,
+          "stddev_ns": 341935,
+          "avg_ts": 19.918506,
+          "stddev_ts": 0.000198,
+          "samples_ns": [
+            25705035168,
+            25704590907,
+            25704592799
+          ],
+          "samples_ts": [
+            19.9183,
+            19.9186,
+            19.9186
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:52:15Z",
+          "avg_ns": 51386428871,
+          "stddev_ns": 4107642,
+          "avg_ts": 9.96372,
+          "stddev_ts": 0.000795,
+          "samples_ns": [
+            51383000069,
+            51385314169,
+            51390972376
+          ],
+          "samples_ts": [
+            9.96439,
+            9.96394,
+            9.96284
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 603
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:55:54.883650+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:54:51Z\",\n    \"avg_ns\": 6376594556,\n    \"stddev_ns\": 485595,\n    \"avg_ts\": 20.073411,\n    \"stddev_ts\": 0.001529,\n    \"samples_ns\": [ 6376975490, 6376760417, 6376047761 ],\n    \"samples_ts\": [ 20.0722, 20.0729, 20.0751 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:55:16Z\",\n    \"avg_ns\": 12701645938,\n    \"stddev_ns\": 3676712,\n    \"avg_ts\": 10.077435,\n    \"stddev_ts\": 0.002915,\n    \"samples_ns\": [ 12705857488, 12699990329, 12699089998 ],\n    \"samples_ts\": [ 10.0741, 10.0787, 10.0795 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:54:51Z",
+          "avg_ns": 6376594556,
+          "stddev_ns": 485595,
+          "avg_ts": 20.073411,
+          "stddev_ts": 0.001529,
+          "samples_ns": [
+            6376975490,
+            6376760417,
+            6376047761
+          ],
+          "samples_ts": [
+            20.0722,
+            20.0729,
+            20.0751
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:55:16Z",
+          "avg_ns": 12701645938,
+          "stddev_ns": 3676712,
+          "avg_ts": 10.077435,
+          "stddev_ts": 0.002915,
+          "samples_ns": [
+            12705857488,
+            12699990329,
+            12699089998
+          ],
+          "samples_ts": [
+            10.0741,
+            10.0787,
+            10.0795
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 604
+    },
+    {
+      "timestamp_utc": "2025-12-09T07:58:55.292634+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:55:55Z\",\n    \"avg_ns\": 6376616629,\n    \"stddev_ns\": 100093,\n    \"avg_ts\": 20.073341,\n    \"stddev_ts\": 0.000315,\n    \"samples_ns\": [ 6376511246, 6376628217, 6376710424 ],\n    \"samples_ts\": [ 20.0737, 20.0733, 20.073 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:56:21Z\",\n    \"avg_ns\": 51270597229,\n    \"stddev_ns\": 3137723,\n    \"avg_ts\": 9.986230,\n    \"stddev_ts\": 0.000611,\n    \"samples_ns\": [ 51267903943, 51274042673, 51269845071 ],\n    \"samples_ts\": [ 9.98676, 9.98556, 9.98638 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:55:55Z",
+          "avg_ns": 6376616629,
+          "stddev_ns": 100093,
+          "avg_ts": 20.073341,
+          "stddev_ts": 0.000315,
+          "samples_ns": [
+            6376511246,
+            6376628217,
+            6376710424
+          ],
+          "samples_ts": [
+            20.0737,
+            20.0733,
+            20.073
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:56:21Z",
+          "avg_ns": 51270597229,
+          "stddev_ns": 3137723,
+          "avg_ts": 9.98623,
+          "stddev_ts": 0.000611,
+          "samples_ns": [
+            51267903943,
+            51274042673,
+            51269845071
+          ],
+          "samples_ts": [
+            9.98676,
+            9.98556,
+            9.98638
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 605
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:01:16.791493+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T07:58:56Z\",\n    \"avg_ns\": 25649951229,\n    \"stddev_ns\": 627001,\n    \"avg_ts\": 19.961052,\n    \"stddev_ts\": 0.000488,\n    \"samples_ns\": [ 25649871862, 25650614135, 25649367690 ],\n    \"samples_ts\": [ 19.9611, 19.9605, 19.9615 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:00:38Z\",\n    \"avg_ns\": 12606877767,\n    \"stddev_ns\": 881508,\n    \"avg_ts\": 10.153188,\n    \"stddev_ts\": 0.000698,\n    \"samples_ns\": [ 12606943818, 12607709943, 12605979542 ],\n    \"samples_ts\": [ 10.1531, 10.1525, 10.1539 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T07:58:56Z",
+          "avg_ns": 25649951229,
+          "stddev_ns": 627001,
+          "avg_ts": 19.961052,
+          "stddev_ts": 0.000488,
+          "samples_ns": [
+            25649871862,
+            25650614135,
+            25649367690
+          ],
+          "samples_ts": [
+            19.9611,
+            19.9605,
+            19.9615
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:00:38Z",
+          "avg_ns": 12606877767,
+          "stddev_ns": 881508,
+          "avg_ts": 10.153188,
+          "stddev_ts": 0.000698,
+          "samples_ns": [
+            12606943818,
+            12607709943,
+            12605979542
+          ],
+          "samples_ts": [
+            10.1531,
+            10.1525,
+            10.1539
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 606
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:05:33.619019+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:01:17Z\",\n    \"avg_ns\": 25639819267,\n    \"stddev_ns\": 2033252,\n    \"avg_ts\": 19.968940,\n    \"stddev_ts\": 0.001574,\n    \"samples_ns\": [ 25639486755, 25637985546, 25641985502 ],\n    \"samples_ts\": [ 19.9692, 19.9704, 19.9673 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:03:00Z\",\n    \"avg_ns\": 51047429391,\n    \"stddev_ns\": 938772,\n    \"avg_ts\": 10.029888,\n    \"stddev_ts\": 0.000179,\n    \"samples_ns\": [ 51047132503, 51046703685, 51048451986 ],\n    \"samples_ts\": [ 10.0299, 10.03, 10.0297 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:01:17Z",
+          "avg_ns": 25639819267,
+          "stddev_ns": 2033252,
+          "avg_ts": 19.96894,
+          "stddev_ts": 0.001574,
+          "samples_ns": [
+            25639486755,
+            25637985546,
+            25641985502
+          ],
+          "samples_ts": [
+            19.9692,
+            19.9704,
+            19.9673
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:03:00Z",
+          "avg_ns": 51047429391,
+          "stddev_ns": 938772,
+          "avg_ts": 10.029888,
+          "stddev_ts": 0.000179,
+          "samples_ns": [
+            51047132503,
+            51046703685,
+            51048451986
+          ],
+          "samples_ts": [
+            10.0299,
+            10.03,
+            10.0297
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 607
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:06:38.116846+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:05:34Z\",\n    \"avg_ns\": 6375177771,\n    \"stddev_ns\": 238622,\n    \"avg_ts\": 20.077872,\n    \"stddev_ts\": 0.000662,\n    \"samples_ns\": [ 6375120661, 6375002013, 6375410641 ],\n    \"samples_ts\": [ 20.0781, 20.0784, 20.0771 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:06:00Z\",\n    \"avg_ns\": 12635129360,\n    \"stddev_ns\": 1229392,\n    \"avg_ts\": 10.130486,\n    \"stddev_ts\": 0.000982,\n    \"samples_ns\": [ 12635792239, 12635879223, 12633716619 ],\n    \"samples_ts\": [ 10.13, 10.1299, 10.1316 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:05:34Z",
+          "avg_ns": 6375177771,
+          "stddev_ns": 238622,
+          "avg_ts": 20.077872,
+          "stddev_ts": 0.000662,
+          "samples_ns": [
+            6375120661,
+            6375002013,
+            6375410641
+          ],
+          "samples_ts": [
+            20.0781,
+            20.0784,
+            20.0771
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:06:00Z",
+          "avg_ns": 12635129360,
+          "stddev_ns": 1229392,
+          "avg_ts": 10.130486,
+          "stddev_ts": 0.000982,
+          "samples_ns": [
+            12635792239,
+            12635879223,
+            12633716619
+          ],
+          "samples_ts": [
+            10.13,
+            10.1299,
+            10.1316
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 608
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:09:38.546044+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:06:39Z\",\n    \"avg_ns\": 6377731866,\n    \"stddev_ns\": 182161,\n    \"avg_ts\": 20.069831,\n    \"stddev_ts\": 0.000573,\n    \"samples_ns\": [ 6377708738, 6377562373, 6377924487 ],\n    \"samples_ts\": [ 20.0699, 20.0704, 20.0692 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:07:04Z\",\n    \"avg_ns\": 51280561146,\n    \"stddev_ns\": 887077,\n    \"avg_ts\": 9.984290,\n    \"stddev_ts\": 0.000167,\n    \"samples_ns\": [ 51279721142, 51280526814, 51281435483 ],\n    \"samples_ts\": [ 9.98445, 9.9843, 9.98412 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:06:39Z",
+          "avg_ns": 6377731866,
+          "stddev_ns": 182161,
+          "avg_ts": 20.069831,
+          "stddev_ts": 0.000573,
+          "samples_ns": [
+            6377708738,
+            6377562373,
+            6377924487
+          ],
+          "samples_ts": [
+            20.0699,
+            20.0704,
+            20.0692
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:07:04Z",
+          "avg_ns": 51280561146,
+          "stddev_ns": 887077,
+          "avg_ts": 9.98429,
+          "stddev_ts": 0.000167,
+          "samples_ns": [
+            51279721142,
+            51280526814,
+            51281435483
+          ],
+          "samples_ts": [
+            9.98445,
+            9.9843,
+            9.98412
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 609
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:12:03.511659+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:09:39Z\",\n    \"avg_ns\": 26464912112,\n    \"stddev_ns\": 1355330,\n    \"avg_ts\": 19.346371,\n    \"stddev_ts\": 0.000991,\n    \"samples_ns\": [ 26464684965, 26466366664, 26463684707 ],\n    \"samples_ts\": [ 19.3465, 19.3453, 19.3473 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:11:25Z\",\n    \"avg_ns\": 12651817639,\n    \"stddev_ns\": 721197,\n    \"avg_ts\": 10.117123,\n    \"stddev_ts\": 0.000570,\n    \"samples_ns\": [ 12650995063, 12652228028, 12652229827 ],\n    \"samples_ts\": [ 10.1178, 10.1168, 10.1168 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:09:39Z",
+          "avg_ns": 26464912112,
+          "stddev_ns": 1355330,
+          "avg_ts": 19.346371,
+          "stddev_ts": 0.000991,
+          "samples_ns": [
+            26464684965,
+            26466366664,
+            26463684707
+          ],
+          "samples_ts": [
+            19.3465,
+            19.3453,
+            19.3473
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:11:25Z",
+          "avg_ns": 12651817639,
+          "stddev_ns": 721197,
+          "avg_ts": 10.117123,
+          "stddev_ts": 0.00057,
+          "samples_ns": [
+            12650995063,
+            12652228028,
+            12652229827
+          ],
+          "samples_ts": [
+            10.1178,
+            10.1168,
+            10.1168
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 610
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:16:24.669350+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:12:04Z\",\n    \"avg_ns\": 26455033170,\n    \"stddev_ns\": 314869,\n    \"avg_ts\": 19.353595,\n    \"stddev_ts\": 0.000197,\n    \"samples_ns\": [ 26454737790, 26455095711, 26455266010 ],\n    \"samples_ts\": [ 19.3538, 19.3535, 19.3534 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:13:50Z\",\n    \"avg_ns\": 51401834255,\n    \"stddev_ns\": 723132,\n    \"avg_ts\": 9.960734,\n    \"stddev_ts\": 0.000140,\n    \"samples_ns\": [ 51402632665, 51401646772, 51401223328 ],\n    \"samples_ts\": [ 9.96058, 9.96077, 9.96085 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:12:04Z",
+          "avg_ns": 26455033170,
+          "stddev_ns": 314869,
+          "avg_ts": 19.353595,
+          "stddev_ts": 0.000197,
+          "samples_ns": [
+            26454737790,
+            26455095711,
+            26455266010
+          ],
+          "samples_ts": [
+            19.3538,
+            19.3535,
+            19.3534
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:13:50Z",
+          "avg_ns": 51401834255,
+          "stddev_ns": 723132,
+          "avg_ts": 9.960734,
+          "stddev_ts": 0.00014,
+          "samples_ns": [
+            51402632665,
+            51401646772,
+            51401223328
+          ],
+          "samples_ts": [
+            9.96058,
+            9.96077,
+            9.96085
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 611
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:17:11.419049+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:16:25Z\",\n    \"avg_ns\": 3242594963,\n    \"stddev_ns\": 589659,\n    \"avg_ts\": 39.474558,\n    \"stddev_ts\": 0.007145,\n    \"samples_ns\": [ 3242745012, 3241947602, 3243092276 ],\n    \"samples_ts\": [ 39.4727, 39.4824, 39.4685 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:16:38Z\",\n    \"avg_ns\": 10906505745,\n    \"stddev_ns\": 3421348,\n    \"avg_ts\": 11.736115,\n    \"stddev_ts\": 0.003678,\n    \"samples_ns\": [ 10906525024, 10903077988, 10909914225 ],\n    \"samples_ts\": [ 11.7361, 11.7398, 11.7324 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:16:25Z",
+          "avg_ns": 3242594963,
+          "stddev_ns": 589659,
+          "avg_ts": 39.474558,
+          "stddev_ts": 0.007145,
+          "samples_ns": [
+            3242745012,
+            3241947602,
+            3243092276
+          ],
+          "samples_ts": [
+            39.4727,
+            39.4824,
+            39.4685
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:16:38Z",
+          "avg_ns": 10906505745,
+          "stddev_ns": 3421348,
+          "avg_ts": 11.736115,
+          "stddev_ts": 0.003678,
+          "samples_ns": [
+            10906525024,
+            10903077988,
+            10909914225
+          ],
+          "samples_ts": [
+            11.7361,
+            11.7398,
+            11.7324
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 612
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:19:37.635318+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:17:12Z\",\n    \"avg_ns\": 3246661858,\n    \"stddev_ns\": 332409,\n    \"avg_ts\": 39.425110,\n    \"stddev_ts\": 0.003977,\n    \"samples_ns\": [ 3246901317, 3246795593, 3246288665 ],\n    \"samples_ts\": [ 39.4222, 39.4235, 39.4296 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:17:25Z\",\n    \"avg_ns\": 44054952084,\n    \"stddev_ns\": 2376560,\n    \"avg_ts\": 11.621849,\n    \"stddev_ts\": 0.000625,\n    \"samples_ns\": [ 44056229726, 44052220488, 44056406039 ],\n    \"samples_ts\": [ 11.6215, 11.6226, 11.6215 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:17:12Z",
+          "avg_ns": 3246661858,
+          "stddev_ns": 332409,
+          "avg_ts": 39.42511,
+          "stddev_ts": 0.003977,
+          "samples_ns": [
+            3246901317,
+            3246795593,
+            3246288665
+          ],
+          "samples_ts": [
+            39.4222,
+            39.4235,
+            39.4296
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:17:25Z",
+          "avg_ns": 44054952084,
+          "stddev_ns": 2376560,
+          "avg_ts": 11.621849,
+          "stddev_ts": 0.000625,
+          "samples_ns": [
+            44056229726,
+            44052220488,
+            44056406039
+          ],
+          "samples_ts": [
+            11.6215,
+            11.6226,
+            11.6215
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 613
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:21:03.945836+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:19:38Z\",\n    \"avg_ns\": 13072195690,\n    \"stddev_ns\": 1026337,\n    \"avg_ts\": 39.167100,\n    \"stddev_ts\": 0.003037,\n    \"samples_ns\": [ 13072995440, 13072535765, 13071055867 ],\n    \"samples_ts\": [ 39.1647, 39.1661, 39.1705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:20:30Z\",\n    \"avg_ns\": 10972280397,\n    \"stddev_ns\": 2109690,\n    \"avg_ts\": 11.665761,\n    \"stddev_ts\": 0.002243,\n    \"samples_ns\": [ 10973593015, 10969846851, 10973401325 ],\n    \"samples_ts\": [ 11.6644, 11.6683, 11.6646 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:19:38Z",
+          "avg_ns": 13072195690,
+          "stddev_ns": 1026337,
+          "avg_ts": 39.1671,
+          "stddev_ts": 0.003037,
+          "samples_ns": [
+            13072995440,
+            13072535765,
+            13071055867
+          ],
+          "samples_ts": [
+            39.1647,
+            39.1661,
+            39.1705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:20:30Z",
+          "avg_ns": 10972280397,
+          "stddev_ns": 2109690,
+          "avg_ts": 11.665761,
+          "stddev_ts": 0.002243,
+          "samples_ns": [
+            10973593015,
+            10969846851,
+            10973401325
+          ],
+          "samples_ts": [
+            11.6644,
+            11.6683,
+            11.6646
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 614
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:24:09.869104+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:21:04Z\",\n    \"avg_ns\": 13067848485,\n    \"stddev_ns\": 828829,\n    \"avg_ts\": 39.180130,\n    \"stddev_ts\": 0.002437,\n    \"samples_ns\": [ 13067831940, 13068669542, 13067043975 ],\n    \"samples_ts\": [ 39.1802, 39.1777, 39.1825 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:21:57Z\",\n    \"avg_ns\": 44177068596,\n    \"stddev_ns\": 9271915,\n    \"avg_ts\": 11.589724,\n    \"stddev_ts\": 0.002431,\n    \"samples_ns\": [ 44169922204, 44187539391, 44173744195 ],\n    \"samples_ts\": [ 11.5916, 11.587, 11.5906 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:21:04Z",
+          "avg_ns": 13067848485,
+          "stddev_ns": 828829,
+          "avg_ts": 39.18013,
+          "stddev_ts": 0.002437,
+          "samples_ns": [
+            13067831940,
+            13068669542,
+            13067043975
+          ],
+          "samples_ts": [
+            39.1802,
+            39.1777,
+            39.1825
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:21:57Z",
+          "avg_ns": 44177068596,
+          "stddev_ns": 9271915,
+          "avg_ts": 11.589724,
+          "stddev_ts": 0.002431,
+          "samples_ns": [
+            44169922204,
+            44187539391,
+            44173744195
+          ],
+          "samples_ts": [
+            11.5916,
+            11.587,
+            11.5906
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 615
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:24:56.643838+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:24:10Z\",\n    \"avg_ns\": 3244733190,\n    \"stddev_ns\": 128753,\n    \"avg_ts\": 39.448544,\n    \"stddev_ts\": 0.001221,\n    \"samples_ns\": [ 3244636857, 3244725431, 3244837284 ],\n    \"samples_ts\": [ 39.4497, 39.4486, 39.4473 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:24:23Z\",\n    \"avg_ns\": 10907206829,\n    \"stddev_ns\": 1553848,\n    \"avg_ts\": 11.735360,\n    \"stddev_ts\": 0.001668,\n    \"samples_ns\": [ 10908157035, 10905417813, 10908045640 ],\n    \"samples_ts\": [ 11.7343, 11.7373, 11.7345 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:24:10Z",
+          "avg_ns": 3244733190,
+          "stddev_ns": 128753,
+          "avg_ts": 39.448544,
+          "stddev_ts": 0.001221,
+          "samples_ns": [
+            3244636857,
+            3244725431,
+            3244837284
+          ],
+          "samples_ts": [
+            39.4497,
+            39.4486,
+            39.4473
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:24:23Z",
+          "avg_ns": 10907206829,
+          "stddev_ns": 1553848,
+          "avg_ts": 11.73536,
+          "stddev_ts": 0.001668,
+          "samples_ns": [
+            10908157035,
+            10905417813,
+            10908045640
+          ],
+          "samples_ts": [
+            11.7343,
+            11.7373,
+            11.7345
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 616
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:27:22.914789+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:24:57Z\",\n    \"avg_ns\": 3245818901,\n    \"stddev_ns\": 328789,\n    \"avg_ts\": 39.435349,\n    \"stddev_ts\": 0.003934,\n    \"samples_ns\": [ 3245975253, 3246034872, 3245446579 ],\n    \"samples_ts\": [ 39.4334, 39.4327, 39.4399 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:25:10Z\",\n    \"avg_ns\": 44056108569,\n    \"stddev_ns\": 4086716,\n    \"avg_ts\": 11.621544,\n    \"stddev_ts\": 0.001077,\n    \"samples_ns\": [ 44052418370, 44060492176, 44055415162 ],\n    \"samples_ts\": [ 11.6225, 11.6204, 11.6217 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:24:57Z",
+          "avg_ns": 3245818901,
+          "stddev_ns": 328789,
+          "avg_ts": 39.435349,
+          "stddev_ts": 0.003934,
+          "samples_ns": [
+            3245975253,
+            3246034872,
+            3245446579
+          ],
+          "samples_ts": [
+            39.4334,
+            39.4327,
+            39.4399
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:25:10Z",
+          "avg_ns": 44056108569,
+          "stddev_ns": 4086716,
+          "avg_ts": 11.621544,
+          "stddev_ts": 0.001077,
+          "samples_ns": [
+            44052418370,
+            44060492176,
+            44055415162
+          ],
+          "samples_ts": [
+            11.6225,
+            11.6204,
+            11.6217
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 617
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:28:49.086456+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:27:23Z\",\n    \"avg_ns\": 13085992678,\n    \"stddev_ns\": 1136135,\n    \"avg_ts\": 39.125805,\n    \"stddev_ts\": 0.003397,\n    \"samples_ns\": [ 13086997615, 13086220539, 13084759880 ],\n    \"samples_ts\": [ 39.1228, 39.1251, 39.1295 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:28:16Z\",\n    \"avg_ns\": 10908745742,\n    \"stddev_ns\": 1901902,\n    \"avg_ts\": 11.733705,\n    \"stddev_ts\": 0.002046,\n    \"samples_ns\": [ 10908625410, 10906906863, 10910704953 ],\n    \"samples_ts\": [ 11.7338, 11.7357, 11.7316 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:27:23Z",
+          "avg_ns": 13085992678,
+          "stddev_ns": 1136135,
+          "avg_ts": 39.125805,
+          "stddev_ts": 0.003397,
+          "samples_ns": [
+            13086997615,
+            13086220539,
+            13084759880
+          ],
+          "samples_ts": [
+            39.1228,
+            39.1251,
+            39.1295
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:28:16Z",
+          "avg_ns": 10908745742,
+          "stddev_ns": 1901902,
+          "avg_ts": 11.733705,
+          "stddev_ts": 0.002046,
+          "samples_ns": [
+            10908625410,
+            10906906863,
+            10910704953
+          ],
+          "samples_ts": [
+            11.7338,
+            11.7357,
+            11.7316
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 618
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:31:54.712548+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:28:50Z\",\n    \"avg_ns\": 13088945973,\n    \"stddev_ns\": 582598,\n    \"avg_ts\": 39.116977,\n    \"stddev_ts\": 0.001673,\n    \"samples_ns\": [ 13089566305, 13088478854, 13088792762 ],\n    \"samples_ts\": [ 39.1151, 39.1184, 39.1174 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:29:42Z\",\n    \"avg_ns\": 44044474021,\n    \"stddev_ns\": 6439614,\n    \"avg_ts\": 11.624614,\n    \"stddev_ts\": 0.001699,\n    \"samples_ns\": [ 44051886996, 44040308792, 44041226276 ],\n    \"samples_ts\": [ 11.6227, 11.6257, 11.6255 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:28:50Z",
+          "avg_ns": 13088945973,
+          "stddev_ns": 582598,
+          "avg_ts": 39.116977,
+          "stddev_ts": 0.001673,
+          "samples_ns": [
+            13089566305,
+            13088478854,
+            13088792762
+          ],
+          "samples_ts": [
+            39.1151,
+            39.1184,
+            39.1174
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:29:42Z",
+          "avg_ns": 44044474021,
+          "stddev_ns": 6439614,
+          "avg_ts": 11.624614,
+          "stddev_ts": 0.001699,
+          "samples_ns": [
+            44051886996,
+            44040308792,
+            44041226276
+          ],
+          "samples_ts": [
+            11.6227,
+            11.6257,
+            11.6255
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 619
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:32:41.537941+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:31:55Z\",\n    \"avg_ns\": 3241817954,\n    \"stddev_ns\": 415737,\n    \"avg_ts\": 39.484019,\n    \"stddev_ts\": 0.005063,\n    \"samples_ns\": [ 3241479905, 3242282156, 3241691801 ],\n    \"samples_ts\": [ 39.4881, 39.4784, 39.4856 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:32:08Z\",\n    \"avg_ns\": 10913749054,\n    \"stddev_ns\": 2383562,\n    \"avg_ts\": 11.728326,\n    \"stddev_ts\": 0.002556,\n    \"samples_ns\": [ 10912249031, 10912506081, 10916492052 ],\n    \"samples_ts\": [ 11.7299, 11.7297, 11.7254 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:31:55Z",
+          "avg_ns": 3241817954,
+          "stddev_ns": 415737,
+          "avg_ts": 39.484019,
+          "stddev_ts": 0.005063,
+          "samples_ns": [
+            3241479905,
+            3242282156,
+            3241691801
+          ],
+          "samples_ts": [
+            39.4881,
+            39.4784,
+            39.4856
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:32:08Z",
+          "avg_ns": 10913749054,
+          "stddev_ns": 2383562,
+          "avg_ts": 11.728326,
+          "stddev_ts": 0.002556,
+          "samples_ns": [
+            10912249031,
+            10912506081,
+            10916492052
+          ],
+          "samples_ts": [
+            11.7299,
+            11.7297,
+            11.7254
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 620
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:35:07.704215+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:32:42Z\",\n    \"avg_ns\": 3243087914,\n    \"stddev_ns\": 398416,\n    \"avg_ts\": 39.468557,\n    \"stddev_ts\": 0.004799,\n    \"samples_ns\": [ 3243101278, 3242687077, 3243475388 ],\n    \"samples_ts\": [ 39.4684, 39.4734, 39.4638 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:32:55Z\",\n    \"avg_ns\": 44033548013,\n    \"stddev_ns\": 2407896,\n    \"avg_ts\": 11.627498,\n    \"stddev_ts\": 0.000633,\n    \"samples_ns\": [ 44033598508, 44035921103, 44031124429 ],\n    \"samples_ts\": [ 11.6275, 11.6269, 11.6281 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:32:42Z",
+          "avg_ns": 3243087914,
+          "stddev_ns": 398416,
+          "avg_ts": 39.468557,
+          "stddev_ts": 0.004799,
+          "samples_ns": [
+            3243101278,
+            3242687077,
+            3243475388
+          ],
+          "samples_ts": [
+            39.4684,
+            39.4734,
+            39.4638
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:32:55Z",
+          "avg_ns": 44033548013,
+          "stddev_ns": 2407896,
+          "avg_ts": 11.627498,
+          "stddev_ts": 0.000633,
+          "samples_ns": [
+            44033598508,
+            44035921103,
+            44031124429
+          ],
+          "samples_ts": [
+            11.6275,
+            11.6269,
+            11.6281
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 621
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:36:35.887514+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:35:08Z\",\n    \"avg_ns\": 13529582255,\n    \"stddev_ns\": 572716,\n    \"avg_ts\": 37.843001,\n    \"stddev_ts\": 0.001534,\n    \"samples_ns\": [ 13529792442, 13528959658, 13529994667 ],\n    \"samples_ts\": [ 37.8424, 37.8447, 37.8418 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:36:02Z\",\n    \"avg_ns\": 10988688743,\n    \"stddev_ns\": 2856768,\n    \"avg_ts\": 11.648342,\n    \"stddev_ts\": 0.003028,\n    \"samples_ns\": [ 10988377494, 10991688391, 10986000344 ],\n    \"samples_ts\": [ 11.6487, 11.6452, 11.6512 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:35:08Z",
+          "avg_ns": 13529582255,
+          "stddev_ns": 572716,
+          "avg_ts": 37.843001,
+          "stddev_ts": 0.001534,
+          "samples_ns": [
+            13529792442,
+            13528959658,
+            13529994667
+          ],
+          "samples_ts": [
+            37.8424,
+            37.8447,
+            37.8418
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:36:02Z",
+          "avg_ns": 10988688743,
+          "stddev_ns": 2856768,
+          "avg_ts": 11.648342,
+          "stddev_ts": 0.003028,
+          "samples_ns": [
+            10988377494,
+            10991688391,
+            10986000344
+          ],
+          "samples_ts": [
+            11.6487,
+            11.6452,
+            11.6512
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 622
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:39:43.274643+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:36:36Z\",\n    \"avg_ns\": 13507689127,\n    \"stddev_ns\": 485237,\n    \"avg_ts\": 37.904337,\n    \"stddev_ts\": 0.001322,\n    \"samples_ns\": [ 13507818321, 13507166896, 13508082165 ],\n    \"samples_ts\": [ 37.904, 37.9058, 37.9032 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:37:30Z\",\n    \"avg_ns\": 44081123009,\n    \"stddev_ns\": 1934827,\n    \"avg_ts\": 11.614949,\n    \"stddev_ts\": 0.000507,\n    \"samples_ns\": [ 44081150223, 44079186145, 44083032660 ],\n    \"samples_ts\": [ 11.6149, 11.6155, 11.6144 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:36:36Z",
+          "avg_ns": 13507689127,
+          "stddev_ns": 485237,
+          "avg_ts": 37.904337,
+          "stddev_ts": 0.001322,
+          "samples_ns": [
+            13507818321,
+            13507166896,
+            13508082165
+          ],
+          "samples_ts": [
+            37.904,
+            37.9058,
+            37.9032
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:37:30Z",
+          "avg_ns": 44081123009,
+          "stddev_ns": 1934827,
+          "avg_ts": 11.614949,
+          "stddev_ts": 0.000507,
+          "samples_ns": [
+            44081150223,
+            44079186145,
+            44083032660
+          ],
+          "samples_ts": [
+            11.6149,
+            11.6155,
+            11.6144
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 623
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:40:30.052621+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:39:44Z\",\n    \"avg_ns\": 3242454579,\n    \"stddev_ns\": 231517,\n    \"avg_ts\": 39.476266,\n    \"stddev_ts\": 0.002819,\n    \"samples_ns\": [ 3242526496, 3242195638, 3242641603 ],\n    \"samples_ts\": [ 39.4754, 39.4794, 39.474 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:39:57Z\",\n    \"avg_ns\": 10905052718,\n    \"stddev_ns\": 2194196,\n    \"avg_ts\": 11.737679,\n    \"stddev_ts\": 0.002362,\n    \"samples_ns\": [ 10906776421, 10905799013, 10902582720 ],\n    \"samples_ts\": [ 11.7358, 11.7369, 11.7403 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:39:44Z",
+          "avg_ns": 3242454579,
+          "stddev_ns": 231517,
+          "avg_ts": 39.476266,
+          "stddev_ts": 0.002819,
+          "samples_ns": [
+            3242526496,
+            3242195638,
+            3242641603
+          ],
+          "samples_ts": [
+            39.4754,
+            39.4794,
+            39.474
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:39:57Z",
+          "avg_ns": 10905052718,
+          "stddev_ns": 2194196,
+          "avg_ts": 11.737679,
+          "stddev_ts": 0.002362,
+          "samples_ns": [
+            10906776421,
+            10905799013,
+            10902582720
+          ],
+          "samples_ts": [
+            11.7358,
+            11.7369,
+            11.7403
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 624
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:42:56.340766+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:40:30Z\",\n    \"avg_ns\": 3246810670,\n    \"stddev_ns\": 401638,\n    \"avg_ts\": 39.423303,\n    \"stddev_ts\": 0.004877,\n    \"samples_ns\": [ 3246367772, 3247151259, 3246912979 ],\n    \"samples_ts\": [ 39.4287, 39.4192, 39.4221 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:40:43Z\",\n    \"avg_ns\": 44069128854,\n    \"stddev_ns\": 3034030,\n    \"avg_ts\": 11.618110,\n    \"stddev_ts\": 0.000800,\n    \"samples_ns\": [ 44072536304, 44066719929, 44068130329 ],\n    \"samples_ts\": [ 11.6172, 11.6187, 11.6184 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:40:30Z",
+          "avg_ns": 3246810670,
+          "stddev_ns": 401638,
+          "avg_ts": 39.423303,
+          "stddev_ts": 0.004877,
+          "samples_ns": [
+            3246367772,
+            3247151259,
+            3246912979
+          ],
+          "samples_ts": [
+            39.4287,
+            39.4192,
+            39.4221
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:40:43Z",
+          "avg_ns": 44069128854,
+          "stddev_ns": 3034030,
+          "avg_ts": 11.61811,
+          "stddev_ts": 0.0008,
+          "samples_ns": [
+            44072536304,
+            44066719929,
+            44068130329
+          ],
+          "samples_ts": [
+            11.6172,
+            11.6187,
+            11.6184
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 625
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:44:22.462043+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:42:57Z\",\n    \"avg_ns\": 13065897473,\n    \"stddev_ns\": 409820,\n    \"avg_ts\": 39.185980,\n    \"stddev_ts\": 0.001129,\n    \"samples_ns\": [ 13066237011, 13065962985, 13065492425 ],\n    \"samples_ts\": [ 39.185, 39.1858, 39.1872 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:43:49Z\",\n    \"avg_ns\": 10903833253,\n    \"stddev_ns\": 5244701,\n    \"avg_ts\": 11.738993,\n    \"stddev_ts\": 0.005646,\n    \"samples_ns\": [ 10908021181, 10905526365, 10897952214 ],\n    \"samples_ts\": [ 11.7345, 11.7372, 11.7453 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:42:57Z",
+          "avg_ns": 13065897473,
+          "stddev_ns": 409820,
+          "avg_ts": 39.18598,
+          "stddev_ts": 0.001129,
+          "samples_ns": [
+            13066237011,
+            13065962985,
+            13065492425
+          ],
+          "samples_ts": [
+            39.185,
+            39.1858,
+            39.1872
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:43:49Z",
+          "avg_ns": 10903833253,
+          "stddev_ns": 5244701,
+          "avg_ts": 11.738993,
+          "stddev_ts": 0.005646,
+          "samples_ns": [
+            10908021181,
+            10905526365,
+            10897952214
+          ],
+          "samples_ts": [
+            11.7345,
+            11.7372,
+            11.7453
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 626
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:47:28.004756+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:44:23Z\",\n    \"avg_ns\": 13075853783,\n    \"stddev_ns\": 688801,\n    \"avg_ts\": 39.156143,\n    \"stddev_ts\": 0.002034,\n    \"samples_ns\": [ 13076440158, 13075109492, 13076011700 ],\n    \"samples_ts\": [ 39.1544, 39.1584, 39.1557 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:45:15Z\",\n    \"avg_ns\": 44050367800,\n    \"stddev_ns\": 965148,\n    \"avg_ts\": 11.623058,\n    \"stddev_ts\": 0.000255,\n    \"samples_ns\": [ 44049255420, 44050982894, 44050865086 ],\n    \"samples_ts\": [ 11.6234, 11.6229, 11.6229 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:44:23Z",
+          "avg_ns": 13075853783,
+          "stddev_ns": 688801,
+          "avg_ts": 39.156143,
+          "stddev_ts": 0.002034,
+          "samples_ns": [
+            13076440158,
+            13075109492,
+            13076011700
+          ],
+          "samples_ts": [
+            39.1544,
+            39.1584,
+            39.1557
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:45:15Z",
+          "avg_ns": 44050367800,
+          "stddev_ns": 965148,
+          "avg_ts": 11.623058,
+          "stddev_ts": 0.000255,
+          "samples_ns": [
+            44049255420,
+            44050982894,
+            44050865086
+          ],
+          "samples_ts": [
+            11.6234,
+            11.6229,
+            11.6229
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 627
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:48:15.101813+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:47:28Z\",\n    \"avg_ns\": 3243301465,\n    \"stddev_ns\": 343637,\n    \"avg_ts\": 39.465958,\n    \"stddev_ts\": 0.004182,\n    \"samples_ns\": [ 3243643606, 3242956351, 3243304438 ],\n    \"samples_ts\": [ 39.4618, 39.4702, 39.4659 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:47:41Z\",\n    \"avg_ns\": 10988299830,\n    \"stddev_ns\": 24908433,\n    \"avg_ts\": 11.648794,\n    \"stddev_ts\": 0.026440,\n    \"samples_ns\": [ 10959545243, 11003214092, 11002140157 ],\n    \"samples_ts\": [ 11.6793, 11.633, 11.6341 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:47:28Z",
+          "avg_ns": 3243301465,
+          "stddev_ns": 343637,
+          "avg_ts": 39.465958,
+          "stddev_ts": 0.004182,
+          "samples_ns": [
+            3243643606,
+            3242956351,
+            3243304438
+          ],
+          "samples_ts": [
+            39.4618,
+            39.4702,
+            39.4659
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:47:41Z",
+          "avg_ns": 10988299830,
+          "stddev_ns": 24908433,
+          "avg_ts": 11.648794,
+          "stddev_ts": 0.02644,
+          "samples_ns": [
+            10959545243,
+            11003214092,
+            11002140157
+          ],
+          "samples_ts": [
+            11.6793,
+            11.633,
+            11.6341
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 628
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:50:41.684106+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:48:15Z\",\n    \"avg_ns\": 3244831593,\n    \"stddev_ns\": 113519,\n    \"avg_ts\": 39.447348,\n    \"stddev_ts\": 0.001194,\n    \"samples_ns\": [ 3244944349, 3244764895, 3244785536 ],\n    \"samples_ts\": [ 39.446, 39.4482, 39.4479 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:48:28Z\",\n    \"avg_ns\": 44176559683,\n    \"stddev_ns\": 14481265,\n    \"avg_ts\": 11.589858,\n    \"stddev_ts\": 0.003799,\n    \"samples_ns\": [ 44189432221, 44179361490, 44160885340 ],\n    \"samples_ts\": [ 11.5865, 11.5891, 11.594 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:48:15Z",
+          "avg_ns": 3244831593,
+          "stddev_ns": 113519,
+          "avg_ts": 39.447348,
+          "stddev_ts": 0.001194,
+          "samples_ns": [
+            3244944349,
+            3244764895,
+            3244785536
+          ],
+          "samples_ts": [
+            39.446,
+            39.4482,
+            39.4479
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:48:28Z",
+          "avg_ns": 44176559683,
+          "stddev_ns": 14481265,
+          "avg_ts": 11.589858,
+          "stddev_ts": 0.003799,
+          "samples_ns": [
+            44189432221,
+            44179361490,
+            44160885340
+          ],
+          "samples_ts": [
+            11.5865,
+            11.5891,
+            11.594
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 629
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:52:07.808789+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:50:42Z\",\n    \"avg_ns\": 13078923300,\n    \"stddev_ns\": 1041923,\n    \"avg_ts\": 39.146953,\n    \"stddev_ts\": 0.003100,\n    \"samples_ns\": [ 13078506802, 13080102334, 13078160765 ],\n    \"samples_ts\": [ 39.1482, 39.1434, 39.1492 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:51:34Z\",\n    \"avg_ns\": 10914142969,\n    \"stddev_ns\": 1325833,\n    \"avg_ts\": 11.727902,\n    \"stddev_ts\": 0.001425,\n    \"samples_ns\": [ 10913784748, 10913033052, 10915611107 ],\n    \"samples_ts\": [ 11.7283, 11.7291, 11.7263 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:50:42Z",
+          "avg_ns": 13078923300,
+          "stddev_ns": 1041923,
+          "avg_ts": 39.146953,
+          "stddev_ts": 0.0031,
+          "samples_ns": [
+            13078506802,
+            13080102334,
+            13078160765
+          ],
+          "samples_ts": [
+            39.1482,
+            39.1434,
+            39.1492
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:51:34Z",
+          "avg_ns": 10914142969,
+          "stddev_ns": 1325833,
+          "avg_ts": 11.727902,
+          "stddev_ts": 0.001425,
+          "samples_ns": [
+            10913784748,
+            10913033052,
+            10915611107
+          ],
+          "samples_ts": [
+            11.7283,
+            11.7291,
+            11.7263
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 630
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:55:13.677428+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:52:08Z\",\n    \"avg_ns\": 13152663707,\n    \"stddev_ns\": 553756,\n    \"avg_ts\": 38.927476,\n    \"stddev_ts\": 0.001639,\n    \"samples_ns\": [ 13152076428, 13152738318, 13153176375 ],\n    \"samples_ts\": [ 38.9292, 38.9273, 38.926 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:53:01Z\",\n    \"avg_ns\": 44060142512,\n    \"stddev_ns\": 7879969,\n    \"avg_ts\": 11.620480,\n    \"stddev_ts\": 0.002077,\n    \"samples_ns\": [ 44051229419, 44063042344, 44066155775 ],\n    \"samples_ts\": [ 11.6228, 11.6197, 11.6189 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:52:08Z",
+          "avg_ns": 13152663707,
+          "stddev_ns": 553756,
+          "avg_ts": 38.927476,
+          "stddev_ts": 0.001639,
+          "samples_ns": [
+            13152076428,
+            13152738318,
+            13153176375
+          ],
+          "samples_ts": [
+            38.9292,
+            38.9273,
+            38.926
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:53:01Z",
+          "avg_ns": 44060142512,
+          "stddev_ns": 7879969,
+          "avg_ts": 11.62048,
+          "stddev_ts": 0.002077,
+          "samples_ns": [
+            44051229419,
+            44063042344,
+            44066155775
+          ],
+          "samples_ts": [
+            11.6228,
+            11.6197,
+            11.6189
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 631
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:56:00.667770+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:55:14Z\",\n    \"avg_ns\": 3246220793,\n    \"stddev_ns\": 316299,\n    \"avg_ts\": 39.430467,\n    \"stddev_ts\": 0.003715,\n    \"samples_ns\": [ 3246267697, 3246500497, 3245894187 ],\n    \"samples_ts\": [ 39.4299, 39.4271, 39.4344 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:55:27Z\",\n    \"avg_ns\": 10978083848,\n    \"stddev_ns\": 1855686,\n    \"avg_ts\": 11.659594,\n    \"stddev_ts\": 0.001968,\n    \"samples_ns\": [ 10979043450, 10975948159, 10979259936 ],\n    \"samples_ts\": [ 11.6586, 11.6619, 11.6583 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:55:14Z",
+          "avg_ns": 3246220793,
+          "stddev_ns": 316299,
+          "avg_ts": 39.430467,
+          "stddev_ts": 0.003715,
+          "samples_ns": [
+            3246267697,
+            3246500497,
+            3245894187
+          ],
+          "samples_ts": [
+            39.4299,
+            39.4271,
+            39.4344
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:55:27Z",
+          "avg_ns": 10978083848,
+          "stddev_ns": 1855686,
+          "avg_ts": 11.659594,
+          "stddev_ts": 0.001968,
+          "samples_ns": [
+            10979043450,
+            10975948159,
+            10979259936
+          ],
+          "samples_ts": [
+            11.6586,
+            11.6619,
+            11.6583
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 632
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:58:27.946280+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:56:01Z\",\n    \"avg_ns\": 3242090190,\n    \"stddev_ns\": 626702,\n    \"avg_ts\": 39.480704,\n    \"stddev_ts\": 0.007601,\n    \"samples_ns\": [ 3241407690, 3242231043, 3242631838 ],\n    \"samples_ts\": [ 39.489, 39.479, 39.4741 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:56:14Z\",\n    \"avg_ns\": 44412905771,\n    \"stddev_ns\": 1775307,\n    \"avg_ts\": 11.528181,\n    \"stddev_ts\": 0.000458,\n    \"samples_ns\": [ 44411561442, 44414901525, 44412254347 ],\n    \"samples_ts\": [ 11.5285, 11.5277, 11.5283 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:56:01Z",
+          "avg_ns": 3242090190,
+          "stddev_ns": 626702,
+          "avg_ts": 39.480704,
+          "stddev_ts": 0.007601,
+          "samples_ns": [
+            3241407690,
+            3242231043,
+            3242631838
+          ],
+          "samples_ts": [
+            39.489,
+            39.479,
+            39.4741
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:56:14Z",
+          "avg_ns": 44412905771,
+          "stddev_ns": 1775307,
+          "avg_ts": 11.528181,
+          "stddev_ts": 0.000458,
+          "samples_ns": [
+            44411561442,
+            44414901525,
+            44412254347
+          ],
+          "samples_ts": [
+            11.5285,
+            11.5277,
+            11.5283
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 633
+    },
+    {
+      "timestamp_utc": "2025-12-09T08:59:55.741055+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:58:28Z\",\n    \"avg_ns\": 13496720361,\n    \"stddev_ns\": 2720257,\n    \"avg_ts\": 37.935143,\n    \"stddev_ts\": 0.007631,\n    \"samples_ns\": [ 13499807752, 13495649768, 13494703565 ],\n    \"samples_ts\": [ 37.9265, 37.9382, 37.9408 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:59:22Z\",\n    \"avg_ns\": 10907698514,\n    \"stddev_ns\": 2945071,\n    \"avg_ts\": 11.734832,\n    \"stddev_ts\": 0.003164,\n    \"samples_ns\": [ 10905884659, 10906118662, 10911092223 ],\n    \"samples_ts\": [ 11.7368, 11.7365, 11.7312 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:58:28Z",
+          "avg_ns": 13496720361,
+          "stddev_ns": 2720257,
+          "avg_ts": 37.935143,
+          "stddev_ts": 0.007631,
+          "samples_ns": [
+            13499807752,
+            13495649768,
+            13494703565
+          ],
+          "samples_ts": [
+            37.9265,
+            37.9382,
+            37.9408
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:59:22Z",
+          "avg_ns": 10907698514,
+          "stddev_ns": 2945071,
+          "avg_ts": 11.734832,
+          "stddev_ts": 0.003164,
+          "samples_ns": [
+            10905884659,
+            10906118662,
+            10911092223
+          ],
+          "samples_ts": [
+            11.7368,
+            11.7365,
+            11.7312
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 634
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:03:03.027466+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T08:59:56Z\",\n    \"avg_ns\": 13516548650,\n    \"stddev_ns\": 620540,\n    \"avg_ts\": 37.879492,\n    \"stddev_ts\": 0.001708,\n    \"samples_ns\": [ 13516118032, 13517246121, 13516281798 ],\n    \"samples_ts\": [ 37.8807, 37.8775, 37.8802 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:00:50Z\",\n    \"avg_ns\": 44049700287,\n    \"stddev_ns\": 5235641,\n    \"avg_ts\": 11.623235,\n    \"stddev_ts\": 0.001379,\n    \"samples_ns\": [ 44055651298, 44045851310, 44047598255 ],\n    \"samples_ts\": [ 11.6217, 11.6243, 11.6238 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T08:59:56Z",
+          "avg_ns": 13516548650,
+          "stddev_ns": 620540,
+          "avg_ts": 37.879492,
+          "stddev_ts": 0.001708,
+          "samples_ns": [
+            13516118032,
+            13517246121,
+            13516281798
+          ],
+          "samples_ts": [
+            37.8807,
+            37.8775,
+            37.8802
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:00:50Z",
+          "avg_ns": 44049700287,
+          "stddev_ns": 5235641,
+          "avg_ts": 11.623235,
+          "stddev_ts": 0.001379,
+          "samples_ns": [
+            44055651298,
+            44045851310,
+            44047598255
+          ],
+          "samples_ts": [
+            11.6217,
+            11.6243,
+            11.6238
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 635
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:03:49.792565+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:03:03Z\",\n    \"avg_ns\": 3244128190,\n    \"stddev_ns\": 268380,\n    \"avg_ts\": 39.455901,\n    \"stddev_ts\": 0.003264,\n    \"samples_ns\": [ 3244267212, 3243818819, 3244298539 ],\n    \"samples_ts\": [ 39.4542, 39.4597, 39.4538 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:03:16Z\",\n    \"avg_ns\": 10914802198,\n    \"stddev_ns\": 1212761,\n    \"avg_ts\": 11.727194,\n    \"stddev_ts\": 0.001293,\n    \"samples_ns\": [ 10915518530, 10913412474, 10915475592 ],\n    \"samples_ts\": [ 11.7264, 11.7287, 11.7265 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:03:03Z",
+          "avg_ns": 3244128190,
+          "stddev_ns": 268380,
+          "avg_ts": 39.455901,
+          "stddev_ts": 0.003264,
+          "samples_ns": [
+            3244267212,
+            3243818819,
+            3244298539
+          ],
+          "samples_ts": [
+            39.4542,
+            39.4597,
+            39.4538
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:03:16Z",
+          "avg_ns": 10914802198,
+          "stddev_ns": 1212761,
+          "avg_ts": 11.727194,
+          "stddev_ts": 0.001293,
+          "samples_ns": [
+            10915518530,
+            10913412474,
+            10915475592
+          ],
+          "samples_ts": [
+            11.7264,
+            11.7287,
+            11.7265
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 636
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:06:15.923837+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:03:50Z\",\n    \"avg_ns\": 3247609957,\n    \"stddev_ns\": 366688,\n    \"avg_ts\": 39.413600,\n    \"stddev_ts\": 0.004341,\n    \"samples_ns\": [ 3247320415, 3247499603, 3248009855 ],\n    \"samples_ts\": [ 39.4171, 39.4149, 39.4087 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:04:03Z\",\n    \"avg_ns\": 44015382583,\n    \"stddev_ns\": 594827,\n    \"avg_ts\": 11.632297,\n    \"stddev_ts\": 0.000147,\n    \"samples_ns\": [ 44015099468, 44015024453, 44016023829 ],\n    \"samples_ts\": [ 11.6324, 11.6324, 11.6321 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:03:50Z",
+          "avg_ns": 3247609957,
+          "stddev_ns": 366688,
+          "avg_ts": 39.4136,
+          "stddev_ts": 0.004341,
+          "samples_ns": [
+            3247320415,
+            3247499603,
+            3248009855
+          ],
+          "samples_ts": [
+            39.4171,
+            39.4149,
+            39.4087
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:04:03Z",
+          "avg_ns": 44015382583,
+          "stddev_ns": 594827,
+          "avg_ts": 11.632297,
+          "stddev_ts": 0.000147,
+          "samples_ns": [
+            44015099468,
+            44015024453,
+            44016023829
+          ],
+          "samples_ts": [
+            11.6324,
+            11.6324,
+            11.6321
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 637
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:07:42.013812+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:06:16Z\",\n    \"avg_ns\": 13062590838,\n    \"stddev_ns\": 1516214,\n    \"avg_ts\": 39.195900,\n    \"stddev_ts\": 0.004550,\n    \"samples_ns\": [ 13064146172, 13061117033, 13062509309 ],\n    \"samples_ts\": [ 39.1912, 39.2003, 39.1961 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:07:09Z\",\n    \"avg_ns\": 10921865597,\n    \"stddev_ns\": 1880108,\n    \"avg_ts\": 11.719610,\n    \"stddev_ts\": 0.002014,\n    \"samples_ns\": [ 10919717856, 10923193025, 10922685911 ],\n    \"samples_ts\": [ 11.7219, 11.7182, 11.7187 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:06:16Z",
+          "avg_ns": 13062590838,
+          "stddev_ns": 1516214,
+          "avg_ts": 39.1959,
+          "stddev_ts": 0.00455,
+          "samples_ns": [
+            13064146172,
+            13061117033,
+            13062509309
+          ],
+          "samples_ts": [
+            39.1912,
+            39.2003,
+            39.1961
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:07:09Z",
+          "avg_ns": 10921865597,
+          "stddev_ns": 1880108,
+          "avg_ts": 11.71961,
+          "stddev_ts": 0.002014,
+          "samples_ns": [
+            10919717856,
+            10923193025,
+            10922685911
+          ],
+          "samples_ts": [
+            11.7219,
+            11.7182,
+            11.7187
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 638
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:10:49.040886+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:07:42Z\",\n    \"avg_ns\": 13166128691,\n    \"stddev_ns\": 129705443,\n    \"avg_ts\": 38.890169,\n    \"stddev_ts\": 0.381291,\n    \"samples_ns\": [ 13313302178, 13116592925, 13068490972 ],\n    \"samples_ts\": [ 38.4578, 39.0345, 39.1782 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:08:35Z\",\n    \"avg_ns\": 44385269541,\n    \"stddev_ns\": 5195581,\n    \"avg_ts\": 11.535359,\n    \"stddev_ts\": 0.001349,\n    \"samples_ns\": [ 44381811756, 44382757860, 44391239008 ],\n    \"samples_ts\": [ 11.5363, 11.536, 11.5338 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:07:42Z",
+          "avg_ns": 13166128691,
+          "stddev_ns": 129705443,
+          "avg_ts": 38.890169,
+          "stddev_ts": 0.381291,
+          "samples_ns": [
+            13313302178,
+            13116592925,
+            13068490972
+          ],
+          "samples_ts": [
+            38.4578,
+            39.0345,
+            39.1782
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:08:35Z",
+          "avg_ns": 44385269541,
+          "stddev_ns": 5195581,
+          "avg_ts": 11.535359,
+          "stddev_ts": 0.001349,
+          "samples_ns": [
+            44381811756,
+            44382757860,
+            44391239008
+          ],
+          "samples_ts": [
+            11.5363,
+            11.536,
+            11.5338
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 639
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:11:35.807518+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:10:49Z\",\n    \"avg_ns\": 3244987390,\n    \"stddev_ns\": 316786,\n    \"avg_ts\": 39.445454,\n    \"stddev_ts\": 0.003851,\n    \"samples_ns\": [ 3244692487, 3245322265, 3244947418 ],\n    \"samples_ts\": [ 39.449, 39.4414, 39.4459 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:11:02Z\",\n    \"avg_ns\": 10906837667,\n    \"stddev_ns\": 1280982,\n    \"avg_ts\": 11.735757,\n    \"stddev_ts\": 0.001369,\n    \"samples_ns\": [ 10907917360, 10907160840, 10905434803 ],\n    \"samples_ts\": [ 11.7346, 11.7354, 11.7373 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:10:49Z",
+          "avg_ns": 3244987390,
+          "stddev_ns": 316786,
+          "avg_ts": 39.445454,
+          "stddev_ts": 0.003851,
+          "samples_ns": [
+            3244692487,
+            3245322265,
+            3244947418
+          ],
+          "samples_ts": [
+            39.449,
+            39.4414,
+            39.4459
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:11:02Z",
+          "avg_ns": 10906837667,
+          "stddev_ns": 1280982,
+          "avg_ts": 11.735757,
+          "stddev_ts": 0.001369,
+          "samples_ns": [
+            10907917360,
+            10907160840,
+            10905434803
+          ],
+          "samples_ts": [
+            11.7346,
+            11.7354,
+            11.7373
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 640
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:14:02.188717+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:11:36Z\",\n    \"avg_ns\": 3244526844,\n    \"stddev_ns\": 316480,\n    \"avg_ts\": 39.451053,\n    \"stddev_ts\": 0.003721,\n    \"samples_ns\": [ 3244528657, 3244831991, 3244219886 ],\n    \"samples_ts\": [ 39.451, 39.4473, 39.4548 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:11:49Z\",\n    \"avg_ns\": 44102325520,\n    \"stddev_ns\": 22706293,\n    \"avg_ts\": 11.609367,\n    \"stddev_ts\": 0.005978,\n    \"samples_ns\": [ 44105168700, 44123476325, 44078331535 ],\n    \"samples_ts\": [ 11.6086, 11.6038, 11.6157 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:11:36Z",
+          "avg_ns": 3244526844,
+          "stddev_ns": 316480,
+          "avg_ts": 39.451053,
+          "stddev_ts": 0.003721,
+          "samples_ns": [
+            3244528657,
+            3244831991,
+            3244219886
+          ],
+          "samples_ts": [
+            39.451,
+            39.4473,
+            39.4548
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:11:49Z",
+          "avg_ns": 44102325520,
+          "stddev_ns": 22706293,
+          "avg_ts": 11.609367,
+          "stddev_ts": 0.005978,
+          "samples_ns": [
+            44105168700,
+            44123476325,
+            44078331535
+          ],
+          "samples_ts": [
+            11.6086,
+            11.6038,
+            11.6157
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 641
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:15:28.329072+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:14:03Z\",\n    \"avg_ns\": 13074068898,\n    \"stddev_ns\": 671662,\n    \"avg_ts\": 39.161489,\n    \"stddev_ts\": 0.001953,\n    \"samples_ns\": [ 13073741142, 13074819645, 13073645909 ],\n    \"samples_ts\": [ 39.1625, 39.1592, 39.1628 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:14:55Z\",\n    \"avg_ns\": 10916274677,\n    \"stddev_ns\": 2005368,\n    \"avg_ts\": 11.725612,\n    \"stddev_ts\": 0.002148,\n    \"samples_ns\": [ 10915858986, 10918449773, 10914515274 ],\n    \"samples_ts\": [ 11.7261, 11.7233, 11.7275 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:14:03Z",
+          "avg_ns": 13074068898,
+          "stddev_ns": 671662,
+          "avg_ts": 39.161489,
+          "stddev_ts": 0.001953,
+          "samples_ns": [
+            13073741142,
+            13074819645,
+            13073645909
+          ],
+          "samples_ts": [
+            39.1625,
+            39.1592,
+            39.1628
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:14:55Z",
+          "avg_ns": 10916274677,
+          "stddev_ns": 2005368,
+          "avg_ts": 11.725612,
+          "stddev_ts": 0.002148,
+          "samples_ns": [
+            10915858986,
+            10918449773,
+            10914515274
+          ],
+          "samples_ts": [
+            11.7261,
+            11.7233,
+            11.7275
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 642
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:18:33.748224+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:15:29Z\",\n    \"avg_ns\": 13082197992,\n    \"stddev_ns\": 558457,\n    \"avg_ts\": 39.137154,\n    \"stddev_ts\": 0.001599,\n    \"samples_ns\": [ 13082089485, 13082778440, 13081726053 ],\n    \"samples_ts\": [ 39.1375, 39.1354, 39.1386 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:16:21Z\",\n    \"avg_ns\": 44007551935,\n    \"stddev_ns\": 3664924,\n    \"avg_ts\": 11.634367,\n    \"stddev_ts\": 0.000969,\n    \"samples_ns\": [ 44010903167, 44003638327, 44008114311 ],\n    \"samples_ts\": [ 11.6335, 11.6354, 11.6342 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:15:29Z",
+          "avg_ns": 13082197992,
+          "stddev_ns": 558457,
+          "avg_ts": 39.137154,
+          "stddev_ts": 0.001599,
+          "samples_ns": [
+            13082089485,
+            13082778440,
+            13081726053
+          ],
+          "samples_ts": [
+            39.1375,
+            39.1354,
+            39.1386
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:16:21Z",
+          "avg_ns": 44007551935,
+          "stddev_ns": 3664924,
+          "avg_ts": 11.634367,
+          "stddev_ts": 0.000969,
+          "samples_ns": [
+            44010903167,
+            44003638327,
+            44008114311
+          ],
+          "samples_ts": [
+            11.6335,
+            11.6354,
+            11.6342
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 643
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:19:20.591922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:18:34Z\",\n    \"avg_ns\": 3244965263,\n    \"stddev_ns\": 459161,\n    \"avg_ts\": 39.445723,\n    \"stddev_ts\": 0.005581,\n    \"samples_ns\": [ 3244868183, 3245465202, 3244562404 ],\n    \"samples_ts\": [ 39.4469, 39.4396, 39.4506 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:18:47Z\",\n    \"avg_ns\": 10919375595,\n    \"stddev_ns\": 3033078,\n    \"avg_ts\": 11.722283,\n    \"stddev_ts\": 0.003253,\n    \"samples_ns\": [ 10920079175, 10921991374, 10916056238 ],\n    \"samples_ts\": [ 11.7215, 11.7195, 11.7258 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:18:34Z",
+          "avg_ns": 3244965263,
+          "stddev_ns": 459161,
+          "avg_ts": 39.445723,
+          "stddev_ts": 0.005581,
+          "samples_ns": [
+            3244868183,
+            3245465202,
+            3244562404
+          ],
+          "samples_ts": [
+            39.4469,
+            39.4396,
+            39.4506
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:18:47Z",
+          "avg_ns": 10919375595,
+          "stddev_ns": 3033078,
+          "avg_ts": 11.722283,
+          "stddev_ts": 0.003253,
+          "samples_ns": [
+            10920079175,
+            10921991374,
+            10916056238
+          ],
+          "samples_ts": [
+            11.7215,
+            11.7195,
+            11.7258
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 644
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:21:47.838154+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:19:21Z\",\n    \"avg_ns\": 3245884271,\n    \"stddev_ns\": 285394,\n    \"avg_ts\": 39.434555,\n    \"stddev_ts\": 0.003326,\n    \"samples_ns\": [ 3246179725, 3245833956, 3245639134 ],\n    \"samples_ts\": [ 39.431, 39.4352, 39.4375 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:19:34Z\",\n    \"avg_ns\": 44399322526,\n    \"stddev_ns\": 7315330,\n    \"avg_ts\": 11.531708,\n    \"stddev_ts\": 0.001899,\n    \"samples_ns\": [ 44403127533, 44390892305, 44403947741 ],\n    \"samples_ts\": [ 11.5307, 11.5339, 11.5305 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:19:21Z",
+          "avg_ns": 3245884271,
+          "stddev_ns": 285394,
+          "avg_ts": 39.434555,
+          "stddev_ts": 0.003326,
+          "samples_ns": [
+            3246179725,
+            3245833956,
+            3245639134
+          ],
+          "samples_ts": [
+            39.431,
+            39.4352,
+            39.4375
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:19:34Z",
+          "avg_ns": 44399322526,
+          "stddev_ns": 7315330,
+          "avg_ts": 11.531708,
+          "stddev_ts": 0.001899,
+          "samples_ns": [
+            44403127533,
+            44390892305,
+            44403947741
+          ],
+          "samples_ts": [
+            11.5307,
+            11.5339,
+            11.5305
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 645
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:23:15.686143+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:21:48Z\",\n    \"avg_ns\": 13504038436,\n    \"stddev_ns\": 341190,\n    \"avg_ts\": 37.914584,\n    \"stddev_ts\": 0.000840,\n    \"samples_ns\": [ 13504121704, 13504286980, 13503706626 ],\n    \"samples_ts\": [ 37.9144, 37.9139, 37.9155 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:22:42Z\",\n    \"avg_ns\": 10921457070,\n    \"stddev_ns\": 2295922,\n    \"avg_ts\": 11.720048,\n    \"stddev_ts\": 0.002461,\n    \"samples_ns\": [ 10919408941, 10923935168, 10921027102 ],\n    \"samples_ts\": [ 11.7222, 11.7174, 11.7205 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:21:48Z",
+          "avg_ns": 13504038436,
+          "stddev_ns": 341190,
+          "avg_ts": 37.914584,
+          "stddev_ts": 0.00084,
+          "samples_ns": [
+            13504121704,
+            13504286980,
+            13503706626
+          ],
+          "samples_ts": [
+            37.9144,
+            37.9139,
+            37.9155
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:22:42Z",
+          "avg_ns": 10921457070,
+          "stddev_ns": 2295922,
+          "avg_ts": 11.720048,
+          "stddev_ts": 0.002461,
+          "samples_ns": [
+            10919408941,
+            10923935168,
+            10921027102
+          ],
+          "samples_ts": [
+            11.7222,
+            11.7174,
+            11.7205
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 646
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:26:24.145185+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:23:16Z\",\n    \"avg_ns\": 13500452506,\n    \"stddev_ns\": 919000,\n    \"avg_ts\": 37.924655,\n    \"stddev_ts\": 0.002582,\n    \"samples_ns\": [ 13499817742, 13501506344, 13500033432 ],\n    \"samples_ts\": [ 37.9264, 37.9217, 37.9258 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:24:10Z\",\n    \"avg_ns\": 44446786940,\n    \"stddev_ns\": 4466344,\n    \"avg_ts\": 11.519393,\n    \"stddev_ts\": 0.001157,\n    \"samples_ns\": [ 44444872517, 44451891376, 44443596927 ],\n    \"samples_ts\": [ 11.5199, 11.5181, 11.5202 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:23:16Z",
+          "avg_ns": 13500452506,
+          "stddev_ns": 919000,
+          "avg_ts": 37.924655,
+          "stddev_ts": 0.002582,
+          "samples_ns": [
+            13499817742,
+            13501506344,
+            13500033432
+          ],
+          "samples_ts": [
+            37.9264,
+            37.9217,
+            37.9258
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:24:10Z",
+          "avg_ns": 44446786940,
+          "stddev_ns": 4466344,
+          "avg_ts": 11.519393,
+          "stddev_ts": 0.001157,
+          "samples_ns": [
+            44444872517,
+            44451891376,
+            44443596927
+          ],
+          "samples_ts": [
+            11.5199,
+            11.5181,
+            11.5202
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 647
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:27:09.212725+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:26:25Z\",\n    \"avg_ns\": 2253946075,\n    \"stddev_ns\": 1024502,\n    \"avg_ts\": 56.789299,\n    \"stddev_ts\": 0.025756,\n    \"samples_ns\": [ 2253890975, 2252952440, 2254994812 ],\n    \"samples_ts\": [ 56.7907, 56.8143, 56.7629 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:26:34Z\",\n    \"avg_ns\": 11660510557,\n    \"stddev_ns\": 1466421,\n    \"avg_ts\": 10.977221,\n    \"stddev_ts\": 0.001380,\n    \"samples_ns\": [ 11659435977, 11659914555, 11662181139 ],\n    \"samples_ts\": [ 10.9782, 10.9778, 10.9756 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:26:25Z",
+          "avg_ns": 2253946075,
+          "stddev_ns": 1024502,
+          "avg_ts": 56.789299,
+          "stddev_ts": 0.025756,
+          "samples_ns": [
+            2253890975,
+            2252952440,
+            2254994812
+          ],
+          "samples_ts": [
+            56.7907,
+            56.8143,
+            56.7629
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:26:34Z",
+          "avg_ns": 11660510557,
+          "stddev_ns": 1466421,
+          "avg_ts": 10.977221,
+          "stddev_ts": 0.00138,
+          "samples_ns": [
+            11659435977,
+            11659914555,
+            11662181139
+          ],
+          "samples_ts": [
+            10.9782,
+            10.9778,
+            10.9756
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 648
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:29:40.300875+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:27:10Z\",\n    \"avg_ns\": 2258811828,\n    \"stddev_ns\": 496582,\n    \"avg_ts\": 56.666962,\n    \"stddev_ts\": 0.012344,\n    \"samples_ns\": [ 2258256302, 2259192642, 2258986542 ],\n    \"samples_ts\": [ 56.6809, 56.6574, 56.6626 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:27:19Z\",\n    \"avg_ns\": 46994651182,\n    \"stddev_ns\": 4096619,\n    \"avg_ts\": 10.894857,\n    \"stddev_ts\": 0.000947,\n    \"samples_ns\": [ 46998994940, 46990886466, 46994072142 ],\n    \"samples_ts\": [ 10.8938, 10.8957, 10.895 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:27:10Z",
+          "avg_ns": 2258811828,
+          "stddev_ns": 496582,
+          "avg_ts": 56.666962,
+          "stddev_ts": 0.012344,
+          "samples_ns": [
+            2258256302,
+            2259192642,
+            2258986542
+          ],
+          "samples_ts": [
+            56.6809,
+            56.6574,
+            56.6626
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:27:19Z",
+          "avg_ns": 46994651182,
+          "stddev_ns": 4096619,
+          "avg_ts": 10.894857,
+          "stddev_ts": 0.000947,
+          "samples_ns": [
+            46998994940,
+            46990886466,
+            46994072142
+          ],
+          "samples_ts": [
+            10.8938,
+            10.8957,
+            10.895
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 649
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:30:52.756800+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:29:41Z\",\n    \"avg_ns\": 9068712258,\n    \"stddev_ns\": 2105710,\n    \"avg_ts\": 56.457852,\n    \"stddev_ts\": 0.013082,\n    \"samples_ns\": [ 9068567677, 9070882215, 9066686884 ],\n    \"samples_ts\": [ 56.4588, 56.4443, 56.4705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:30:17Z\",\n    \"avg_ns\": 11694934021,\n    \"stddev_ns\": 2532102,\n    \"avg_ts\": 10.944910,\n    \"stddev_ts\": 0.002367,\n    \"samples_ns\": [ 11694021971, 11692986721, 11697793372 ],\n    \"samples_ts\": [ 10.9458, 10.9467, 10.9422 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:29:41Z",
+          "avg_ns": 9068712258,
+          "stddev_ns": 2105710,
+          "avg_ts": 56.457852,
+          "stddev_ts": 0.013082,
+          "samples_ns": [
+            9068567677,
+            9070882215,
+            9066686884
+          ],
+          "samples_ts": [
+            56.4588,
+            56.4443,
+            56.4705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:30:17Z",
+          "avg_ns": 11694934021,
+          "stddev_ns": 2532102,
+          "avg_ts": 10.94491,
+          "stddev_ts": 0.002367,
+          "samples_ns": [
+            11694021971,
+            11692986721,
+            11697793372
+          ],
+          "samples_ts": [
+            10.9458,
+            10.9467,
+            10.9422
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 650
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:33:51.367079+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:30:53Z\",\n    \"avg_ns\": 9077567783,\n    \"stddev_ns\": 678920,\n    \"avg_ts\": 56.402774,\n    \"stddev_ts\": 0.004218,\n    \"samples_ns\": [ 9078277867, 9077500437, 9076925045 ],\n    \"samples_ts\": [ 56.3984, 56.4032, 56.4068 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:31:29Z\",\n    \"avg_ns\": 47079245562,\n    \"stddev_ns\": 9430764,\n    \"avg_ts\": 10.875281,\n    \"stddev_ts\": 0.002178,\n    \"samples_ns\": [ 47085358969, 47083987248, 47068390471 ],\n    \"samples_ts\": [ 10.8739, 10.8742, 10.8778 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:30:53Z",
+          "avg_ns": 9077567783,
+          "stddev_ns": 678920,
+          "avg_ts": 56.402774,
+          "stddev_ts": 0.004218,
+          "samples_ns": [
+            9078277867,
+            9077500437,
+            9076925045
+          ],
+          "samples_ts": [
+            56.3984,
+            56.4032,
+            56.4068
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:31:29Z",
+          "avg_ns": 47079245562,
+          "stddev_ns": 9430764,
+          "avg_ts": 10.875281,
+          "stddev_ts": 0.002178,
+          "samples_ns": [
+            47085358969,
+            47083987248,
+            47068390471
+          ],
+          "samples_ts": [
+            10.8739,
+            10.8742,
+            10.8778
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 651
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:34:36.500587+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:33:52Z\",\n    \"avg_ns\": 2258972555,\n    \"stddev_ns\": 1107517,\n    \"avg_ts\": 56.662937,\n    \"stddev_ts\": 0.027779,\n    \"samples_ns\": [ 2257900833, 2260112696, 2258904136 ],\n    \"samples_ts\": [ 56.6898, 56.6343, 56.6646 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:34:01Z\",\n    \"avg_ns\": 11675569045,\n    \"stddev_ns\": 2651482,\n    \"avg_ts\": 10.963063,\n    \"stddev_ts\": 0.002486,\n    \"samples_ns\": [ 11675492619, 11678253507, 11672961011 ],\n    \"samples_ts\": [ 10.9631, 10.9605, 10.9655 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:33:52Z",
+          "avg_ns": 2258972555,
+          "stddev_ns": 1107517,
+          "avg_ts": 56.662937,
+          "stddev_ts": 0.027779,
+          "samples_ns": [
+            2257900833,
+            2260112696,
+            2258904136
+          ],
+          "samples_ts": [
+            56.6898,
+            56.6343,
+            56.6646
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:34:01Z",
+          "avg_ns": 11675569045,
+          "stddev_ns": 2651482,
+          "avg_ts": 10.963063,
+          "stddev_ts": 0.002486,
+          "samples_ns": [
+            11675492619,
+            11678253507,
+            11672961011
+          ],
+          "samples_ts": [
+            10.9631,
+            10.9605,
+            10.9655
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 652
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:37:07.549755+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:34:37Z\",\n    \"avg_ns\": 2255199274,\n    \"stddev_ns\": 2183362,\n    \"avg_ts\": 56.757769,\n    \"stddev_ts\": 0.054979,\n    \"samples_ns\": [ 2252691993, 2256681454, 2256224375 ],\n    \"samples_ts\": [ 56.8209, 56.7205, 56.7319 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:34:46Z\",\n    \"avg_ns\": 46991701515,\n    \"stddev_ns\": 4287042,\n    \"avg_ts\": 10.895541,\n    \"stddev_ts\": 0.000993,\n    \"samples_ns\": [ 46994967297, 46993283083, 46986854166 ],\n    \"samples_ts\": [ 10.8948, 10.8952, 10.8967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:34:37Z",
+          "avg_ns": 2255199274,
+          "stddev_ns": 2183362,
+          "avg_ts": 56.757769,
+          "stddev_ts": 0.054979,
+          "samples_ns": [
+            2252691993,
+            2256681454,
+            2256224375
+          ],
+          "samples_ts": [
+            56.8209,
+            56.7205,
+            56.7319
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:34:46Z",
+          "avg_ns": 46991701515,
+          "stddev_ns": 4287042,
+          "avg_ts": 10.895541,
+          "stddev_ts": 0.000993,
+          "samples_ns": [
+            46994967297,
+            46993283083,
+            46986854166
+          ],
+          "samples_ts": [
+            10.8948,
+            10.8952,
+            10.8967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 653
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:38:19.863358+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:37:08Z\",\n    \"avg_ns\": 9049818173,\n    \"stddev_ns\": 1356482,\n    \"avg_ts\": 56.575723,\n    \"stddev_ts\": 0.008480,\n    \"samples_ns\": [ 9049426698, 9048700478, 9051327343 ],\n    \"samples_ts\": [ 56.5782, 56.5827, 56.5663 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:37:44Z\",\n    \"avg_ns\": 11672725405,\n    \"stddev_ns\": 5889859,\n    \"avg_ts\": 10.965736,\n    \"stddev_ts\": 0.005534,\n    \"samples_ns\": [ 11677751960, 11674179589, 11666244666 ],\n    \"samples_ts\": [ 10.961, 10.9644, 10.9718 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:37:08Z",
+          "avg_ns": 9049818173,
+          "stddev_ns": 1356482,
+          "avg_ts": 56.575723,
+          "stddev_ts": 0.00848,
+          "samples_ns": [
+            9049426698,
+            9048700478,
+            9051327343
+          ],
+          "samples_ts": [
+            56.5782,
+            56.5827,
+            56.5663
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:37:44Z",
+          "avg_ns": 11672725405,
+          "stddev_ns": 5889859,
+          "avg_ts": 10.965736,
+          "stddev_ts": 0.005534,
+          "samples_ns": [
+            11677751960,
+            11674179589,
+            11666244666
+          ],
+          "samples_ts": [
+            10.961,
+            10.9644,
+            10.9718
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 654
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:41:18.399707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:38:20Z\",\n    \"avg_ns\": 9043831769,\n    \"stddev_ns\": 1899344,\n    \"avg_ts\": 56.613173,\n    \"stddev_ts\": 0.011859,\n    \"samples_ns\": [ 9043609551, 9042058102, 9045827656 ],\n    \"samples_ts\": [ 56.6146, 56.6243, 56.6007 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:38:56Z\",\n    \"avg_ns\": 47095510261,\n    \"stddev_ns\": 1257635,\n    \"avg_ts\": 10.871525,\n    \"stddev_ts\": 0.000286,\n    \"samples_ns\": [ 47095069077, 47094552478, 47096909229 ],\n    \"samples_ts\": [ 10.8716, 10.8717, 10.8712 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:38:20Z",
+          "avg_ns": 9043831769,
+          "stddev_ns": 1899344,
+          "avg_ts": 56.613173,
+          "stddev_ts": 0.011859,
+          "samples_ns": [
+            9043609551,
+            9042058102,
+            9045827656
+          ],
+          "samples_ts": [
+            56.6146,
+            56.6243,
+            56.6007
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:38:56Z",
+          "avg_ns": 47095510261,
+          "stddev_ns": 1257635,
+          "avg_ts": 10.871525,
+          "stddev_ts": 0.000286,
+          "samples_ns": [
+            47095069077,
+            47094552478,
+            47096909229
+          ],
+          "samples_ts": [
+            10.8716,
+            10.8717,
+            10.8712
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 655
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:42:03.521015+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:41:19Z\",\n    \"avg_ns\": 2255625843,\n    \"stddev_ns\": 972411,\n    \"avg_ts\": 56.747007,\n    \"stddev_ts\": 0.024469,\n    \"samples_ns\": [ 2256354696, 2256001122, 2254521711 ],\n    \"samples_ts\": [ 56.7287, 56.7376, 56.7748 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:41:28Z\",\n    \"avg_ns\": 11676996046,\n    \"stddev_ns\": 6377441,\n    \"avg_ts\": 10.961726,\n    \"stddev_ts\": 0.005985,\n    \"samples_ns\": [ 11673024021, 11673611861, 11684352256 ],\n    \"samples_ts\": [ 10.9655, 10.9649, 10.9548 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:41:19Z",
+          "avg_ns": 2255625843,
+          "stddev_ns": 972411,
+          "avg_ts": 56.747007,
+          "stddev_ts": 0.024469,
+          "samples_ns": [
+            2256354696,
+            2256001122,
+            2254521711
+          ],
+          "samples_ts": [
+            56.7287,
+            56.7376,
+            56.7748
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:41:28Z",
+          "avg_ns": 11676996046,
+          "stddev_ns": 6377441,
+          "avg_ts": 10.961726,
+          "stddev_ts": 0.005985,
+          "samples_ns": [
+            11673024021,
+            11673611861,
+            11684352256
+          ],
+          "samples_ts": [
+            10.9655,
+            10.9649,
+            10.9548
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 656
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:44:34.640086+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:42:04Z\",\n    \"avg_ns\": 2253646556,\n    \"stddev_ns\": 1515310,\n    \"avg_ts\": 56.796856,\n    \"stddev_ts\": 0.038158,\n    \"samples_ns\": [ 2255365560, 2253065771, 2252508338 ],\n    \"samples_ts\": [ 56.7535, 56.8115, 56.8255 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:42:13Z\",\n    \"avg_ns\": 47014864051,\n    \"stddev_ns\": 5195054,\n    \"avg_ts\": 10.890173,\n    \"stddev_ts\": 0.001201,\n    \"samples_ns\": [ 47013709988, 47010352305, 47020529862 ],\n    \"samples_ts\": [ 10.8904, 10.8912, 10.8889 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:42:04Z",
+          "avg_ns": 2253646556,
+          "stddev_ns": 1515310,
+          "avg_ts": 56.796856,
+          "stddev_ts": 0.038158,
+          "samples_ns": [
+            2255365560,
+            2253065771,
+            2252508338
+          ],
+          "samples_ts": [
+            56.7535,
+            56.8115,
+            56.8255
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:42:13Z",
+          "avg_ns": 47014864051,
+          "stddev_ns": 5195054,
+          "avg_ts": 10.890173,
+          "stddev_ts": 0.001201,
+          "samples_ns": [
+            47013709988,
+            47010352305,
+            47020529862
+          ],
+          "samples_ts": [
+            10.8904,
+            10.8912,
+            10.8889
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 657
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:45:48.384046+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:44:35Z\",\n    \"avg_ns\": 9393076613,\n    \"stddev_ns\": 3271555,\n    \"avg_ts\": 54.508236,\n    \"stddev_ts\": 0.018972,\n    \"samples_ns\": [ 9389304878, 9394840842, 9395084121 ],\n    \"samples_ts\": [ 54.5301, 54.498, 54.4966 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:45:13Z\",\n    \"avg_ns\": 11666979448,\n    \"stddev_ns\": 2067549,\n    \"avg_ts\": 10.971135,\n    \"stddev_ts\": 0.001944,\n    \"samples_ns\": [ 11667632273, 11668641785, 11664664286 ],\n    \"samples_ts\": [ 10.9705, 10.9696, 10.9733 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:44:35Z",
+          "avg_ns": 9393076613,
+          "stddev_ns": 3271555,
+          "avg_ts": 54.508236,
+          "stddev_ts": 0.018972,
+          "samples_ns": [
+            9389304878,
+            9394840842,
+            9395084121
+          ],
+          "samples_ts": [
+            54.5301,
+            54.498,
+            54.4966
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:45:13Z",
+          "avg_ns": 11666979448,
+          "stddev_ns": 2067549,
+          "avg_ts": 10.971135,
+          "stddev_ts": 0.001944,
+          "samples_ns": [
+            11667632273,
+            11668641785,
+            11664664286
+          ],
+          "samples_ts": [
+            10.9705,
+            10.9696,
+            10.9733
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 658
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:48:48.144243+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:45:49Z\",\n    \"avg_ns\": 9396676832,\n    \"stddev_ns\": 3469410,\n    \"avg_ts\": 54.487353,\n    \"stddev_ts\": 0.020102,\n    \"samples_ns\": [ 9400055231, 9396847159, 9393128108 ],\n    \"samples_ts\": [ 54.4678, 54.4864, 54.5079 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:46:26Z\",\n    \"avg_ns\": 47034020032,\n    \"stddev_ns\": 3375722,\n    \"avg_ts\": 10.885738,\n    \"stddev_ts\": 0.000781,\n    \"samples_ns\": [ 47037884304, 47031645118, 47032530674 ],\n    \"samples_ts\": [ 10.8848, 10.8863, 10.8861 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:45:49Z",
+          "avg_ns": 9396676832,
+          "stddev_ns": 3469410,
+          "avg_ts": 54.487353,
+          "stddev_ts": 0.020102,
+          "samples_ns": [
+            9400055231,
+            9396847159,
+            9393128108
+          ],
+          "samples_ts": [
+            54.4678,
+            54.4864,
+            54.5079
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:46:26Z",
+          "avg_ns": 47034020032,
+          "stddev_ns": 3375722,
+          "avg_ts": 10.885738,
+          "stddev_ts": 0.000781,
+          "samples_ns": [
+            47037884304,
+            47031645118,
+            47032530674
+          ],
+          "samples_ts": [
+            10.8848,
+            10.8863,
+            10.8861
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 659
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:49:33.322692+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:48:49Z\",\n    \"avg_ns\": 2252956733,\n    \"stddev_ns\": 375580,\n    \"avg_ts\": 56.814230,\n    \"stddev_ts\": 0.009470,\n    \"samples_ns\": [ 2252708517, 2253388822, 2252772860 ],\n    \"samples_ts\": [ 56.8205, 56.8033, 56.8189 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:48:58Z\",\n    \"avg_ns\": 11701107920,\n    \"stddev_ns\": 3094105,\n    \"avg_ts\": 10.939136,\n    \"stddev_ts\": 0.002893,\n    \"samples_ns\": [ 11697924307, 11704103987, 11701295466 ],\n    \"samples_ts\": [ 10.9421, 10.9363, 10.939 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:48:49Z",
+          "avg_ns": 2252956733,
+          "stddev_ns": 375580,
+          "avg_ts": 56.81423,
+          "stddev_ts": 0.00947,
+          "samples_ns": [
+            2252708517,
+            2253388822,
+            2252772860
+          ],
+          "samples_ts": [
+            56.8205,
+            56.8033,
+            56.8189
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:48:58Z",
+          "avg_ns": 11701107920,
+          "stddev_ns": 3094105,
+          "avg_ts": 10.939136,
+          "stddev_ts": 0.002893,
+          "samples_ns": [
+            11697924307,
+            11704103987,
+            11701295466
+          ],
+          "samples_ts": [
+            10.9421,
+            10.9363,
+            10.939
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 660
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:52:04.432601+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:49:34Z\",\n    \"avg_ns\": 2254717366,\n    \"stddev_ns\": 1614972,\n    \"avg_ts\": 56.769884,\n    \"stddev_ts\": 0.040675,\n    \"samples_ns\": [ 2255994483, 2255255610, 2252902005 ],\n    \"samples_ts\": [ 56.7377, 56.7563, 56.8156 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:49:43Z\",\n    \"avg_ns\": 47008115715,\n    \"stddev_ns\": 9629678,\n    \"avg_ts\": 10.891737,\n    \"stddev_ts\": 0.002231,\n    \"samples_ns\": [ 47006704621, 47018373085, 46999269439 ],\n    \"samples_ts\": [ 10.8921, 10.8894, 10.8938 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:49:34Z",
+          "avg_ns": 2254717366,
+          "stddev_ns": 1614972,
+          "avg_ts": 56.769884,
+          "stddev_ts": 0.040675,
+          "samples_ns": [
+            2255994483,
+            2255255610,
+            2252902005
+          ],
+          "samples_ts": [
+            56.7377,
+            56.7563,
+            56.8156
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:49:43Z",
+          "avg_ns": 47008115715,
+          "stddev_ns": 9629678,
+          "avg_ts": 10.891737,
+          "stddev_ts": 0.002231,
+          "samples_ns": [
+            47006704621,
+            47018373085,
+            46999269439
+          ],
+          "samples_ts": [
+            10.8921,
+            10.8894,
+            10.8938
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 661
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:53:16.893350+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:52:05Z\",\n    \"avg_ns\": 9076239978,\n    \"stddev_ns\": 3130119,\n    \"avg_ts\": 56.411029,\n    \"stddev_ts\": 0.019433,\n    \"samples_ns\": [ 9079807763, 9074938458, 9073973715 ],\n    \"samples_ts\": [ 56.3889, 56.4191, 56.4251 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:52:41Z\",\n    \"avg_ns\": 11696711631,\n    \"stddev_ns\": 1463703,\n    \"avg_ts\": 10.943247,\n    \"stddev_ts\": 0.001362,\n    \"samples_ns\": [ 11698392516, 11695873314, 11695869065 ],\n    \"samples_ts\": [ 10.9417, 10.944, 10.944 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:52:05Z",
+          "avg_ns": 9076239978,
+          "stddev_ns": 3130119,
+          "avg_ts": 56.411029,
+          "stddev_ts": 0.019433,
+          "samples_ns": [
+            9079807763,
+            9074938458,
+            9073973715
+          ],
+          "samples_ts": [
+            56.3889,
+            56.4191,
+            56.4251
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:52:41Z",
+          "avg_ns": 11696711631,
+          "stddev_ns": 1463703,
+          "avg_ts": 10.943247,
+          "stddev_ts": 0.001362,
+          "samples_ns": [
+            11698392516,
+            11695873314,
+            11695869065
+          ],
+          "samples_ts": [
+            10.9417,
+            10.944,
+            10.944
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 662
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:56:16.066986+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:53:17Z\",\n    \"avg_ns\": 9060441913,\n    \"stddev_ns\": 1558750,\n    \"avg_ts\": 56.509386,\n    \"stddev_ts\": 0.009686,\n    \"samples_ns\": [ 9060868345, 9058720322, 9061737074 ],\n    \"samples_ts\": [ 56.5067, 56.5201, 56.5013 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:53:54Z\",\n    \"avg_ns\": 47281856686,\n    \"stddev_ns\": 5440296,\n    \"avg_ts\": 10.828678,\n    \"stddev_ts\": 0.001244,\n    \"samples_ns\": [ 47281689582, 47276510569, 47287369909 ],\n    \"samples_ts\": [ 10.8287, 10.8299, 10.8274 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:53:17Z",
+          "avg_ns": 9060441913,
+          "stddev_ns": 1558750,
+          "avg_ts": 56.509386,
+          "stddev_ts": 0.009686,
+          "samples_ns": [
+            9060868345,
+            9058720322,
+            9061737074
+          ],
+          "samples_ts": [
+            56.5067,
+            56.5201,
+            56.5013
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:53:54Z",
+          "avg_ns": 47281856686,
+          "stddev_ns": 5440296,
+          "avg_ts": 10.828678,
+          "stddev_ts": 0.001244,
+          "samples_ns": [
+            47281689582,
+            47276510569,
+            47287369909
+          ],
+          "samples_ts": [
+            10.8287,
+            10.8299,
+            10.8274
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 663
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:57:01.145750+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:56:16Z\",\n    \"avg_ns\": 2252352584,\n    \"stddev_ns\": 348786,\n    \"avg_ts\": 56.829469,\n    \"stddev_ts\": 0.008719,\n    \"samples_ns\": [ 2251983190, 2252406665, 2252667898 ],\n    \"samples_ts\": [ 56.8388, 56.8281, 56.8215 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:56:25Z\",\n    \"avg_ns\": 11666410130,\n    \"stddev_ns\": 3483842,\n    \"avg_ts\": 10.971670,\n    \"stddev_ts\": 0.003277,\n    \"samples_ns\": [ 11669728933, 11666719566, 11662781891 ],\n    \"samples_ts\": [ 10.9685, 10.9714, 10.9751 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:56:16Z",
+          "avg_ns": 2252352584,
+          "stddev_ns": 348786,
+          "avg_ts": 56.829469,
+          "stddev_ts": 0.008719,
+          "samples_ns": [
+            2251983190,
+            2252406665,
+            2252667898
+          ],
+          "samples_ts": [
+            56.8388,
+            56.8281,
+            56.8215
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:56:25Z",
+          "avg_ns": 11666410130,
+          "stddev_ns": 3483842,
+          "avg_ts": 10.97167,
+          "stddev_ts": 0.003277,
+          "samples_ns": [
+            11669728933,
+            11666719566,
+            11662781891
+          ],
+          "samples_ts": [
+            10.9685,
+            10.9714,
+            10.9751
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 664
+    },
+    {
+      "timestamp_utc": "2025-12-09T09:59:32.595965+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:57:02Z\",\n    \"avg_ns\": 2253780359,\n    \"stddev_ns\": 823164,\n    \"avg_ts\": 56.793472,\n    \"stddev_ts\": 0.020746,\n    \"samples_ns\": [ 2254023130, 2252863111, 2254454836 ],\n    \"samples_ts\": [ 56.7873, 56.8166, 56.7765 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:57:11Z\",\n    \"avg_ns\": 47122209373,\n    \"stddev_ns\": 93928951,\n    \"avg_ts\": 10.865394,\n    \"stddev_ts\": 0.021644,\n    \"samples_ns\": [ 47041257738, 47100173011, 47225197370 ],\n    \"samples_ts\": [ 10.8841, 10.8704, 10.8417 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:57:02Z",
+          "avg_ns": 2253780359,
+          "stddev_ns": 823164,
+          "avg_ts": 56.793472,
+          "stddev_ts": 0.020746,
+          "samples_ns": [
+            2254023130,
+            2252863111,
+            2254454836
+          ],
+          "samples_ts": [
+            56.7873,
+            56.8166,
+            56.7765
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:57:11Z",
+          "avg_ns": 47122209373,
+          "stddev_ns": 93928951,
+          "avg_ts": 10.865394,
+          "stddev_ts": 0.021644,
+          "samples_ns": [
+            47041257738,
+            47100173011,
+            47225197370
+          ],
+          "samples_ts": [
+            10.8841,
+            10.8704,
+            10.8417
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 665
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:00:44.940066+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T09:59:33Z\",\n    \"avg_ns\": 9051553549,\n    \"stddev_ns\": 1872225,\n    \"avg_ts\": 56.564877,\n    \"stddev_ts\": 0.011683,\n    \"samples_ns\": [ 9053700736, 9050675833, 9050284079 ],\n    \"samples_ts\": [ 56.5515, 56.5704, 56.5728 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:00:09Z\",\n    \"avg_ns\": 11682671668,\n    \"stddev_ns\": 1634528,\n    \"avg_ts\": 10.956398,\n    \"stddev_ts\": 0.001530,\n    \"samples_ns\": [ 11683708232, 11683515059, 11680791714 ],\n    \"samples_ts\": [ 10.9554, 10.9556, 10.9582 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T09:59:33Z",
+          "avg_ns": 9051553549,
+          "stddev_ns": 1872225,
+          "avg_ts": 56.564877,
+          "stddev_ts": 0.011683,
+          "samples_ns": [
+            9053700736,
+            9050675833,
+            9050284079
+          ],
+          "samples_ts": [
+            56.5515,
+            56.5704,
+            56.5728
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:00:09Z",
+          "avg_ns": 11682671668,
+          "stddev_ns": 1634528,
+          "avg_ts": 10.956398,
+          "stddev_ts": 0.00153,
+          "samples_ns": [
+            11683708232,
+            11683515059,
+            11680791714
+          ],
+          "samples_ts": [
+            10.9554,
+            10.9556,
+            10.9582
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 666
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:03:43.454801+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:00:45Z\",\n    \"avg_ns\": 9043234162,\n    \"stddev_ns\": 1683786,\n    \"avg_ts\": 56.616914,\n    \"stddev_ts\": 0.010524,\n    \"samples_ns\": [ 9042437845, 9045165456, 9042099186 ],\n    \"samples_ts\": [ 56.6219, 56.6048, 56.624 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:01:22Z\",\n    \"avg_ns\": 47088553426,\n    \"stddev_ns\": 6893596,\n    \"avg_ts\": 10.873131,\n    \"stddev_ts\": 0.001592,\n    \"samples_ns\": [ 47081786439, 47088306825, 47095567014 ],\n    \"samples_ts\": [ 10.8747, 10.8732, 10.8715 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:00:45Z",
+          "avg_ns": 9043234162,
+          "stddev_ns": 1683786,
+          "avg_ts": 56.616914,
+          "stddev_ts": 0.010524,
+          "samples_ns": [
+            9042437845,
+            9045165456,
+            9042099186
+          ],
+          "samples_ts": [
+            56.6219,
+            56.6048,
+            56.624
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:01:22Z",
+          "avg_ns": 47088553426,
+          "stddev_ns": 6893596,
+          "avg_ts": 10.873131,
+          "stddev_ts": 0.001592,
+          "samples_ns": [
+            47081786439,
+            47088306825,
+            47095567014
+          ],
+          "samples_ts": [
+            10.8747,
+            10.8732,
+            10.8715
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 667
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:04:28.676826+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:03:44Z\",\n    \"avg_ns\": 2259305583,\n    \"stddev_ns\": 599743,\n    \"avg_ts\": 56.654579,\n    \"stddev_ts\": 0.014992,\n    \"samples_ns\": [ 2258708125, 2259304787, 2259903838 ],\n    \"samples_ts\": [ 56.6696, 56.6546, 56.6396 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:03:53Z\",\n    \"avg_ns\": 11705143787,\n    \"stddev_ns\": 4780239,\n    \"avg_ts\": 10.935365,\n    \"stddev_ts\": 0.004464,\n    \"samples_ns\": [ 11710602436, 11703115430, 11701713496 ],\n    \"samples_ts\": [ 10.9303, 10.9373, 10.9386 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:03:44Z",
+          "avg_ns": 2259305583,
+          "stddev_ns": 599743,
+          "avg_ts": 56.654579,
+          "stddev_ts": 0.014992,
+          "samples_ns": [
+            2258708125,
+            2259304787,
+            2259903838
+          ],
+          "samples_ts": [
+            56.6696,
+            56.6546,
+            56.6396
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:03:53Z",
+          "avg_ns": 11705143787,
+          "stddev_ns": 4780239,
+          "avg_ts": 10.935365,
+          "stddev_ts": 0.004464,
+          "samples_ns": [
+            11710602436,
+            11703115430,
+            11701713496
+          ],
+          "samples_ts": [
+            10.9303,
+            10.9373,
+            10.9386
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 668
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:07:00.274656+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:04:29Z\",\n    \"avg_ns\": 2253455688,\n    \"stddev_ns\": 191690,\n    \"avg_ts\": 56.801650,\n    \"stddev_ts\": 0.004832,\n    \"samples_ns\": [ 2253256646, 2253639066, 2253471352 ],\n    \"samples_ts\": [ 56.8067, 56.797, 56.8013 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:04:38Z\",\n    \"avg_ns\": 47150248942,\n    \"stddev_ns\": 7650567,\n    \"avg_ts\": 10.858904,\n    \"stddev_ts\": 0.001762,\n    \"samples_ns\": [ 47154505807, 47154824278, 47141416741 ],\n    \"samples_ts\": [ 10.8579, 10.8578, 10.8609 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:04:29Z",
+          "avg_ns": 2253455688,
+          "stddev_ns": 191690,
+          "avg_ts": 56.80165,
+          "stddev_ts": 0.004832,
+          "samples_ns": [
+            2253256646,
+            2253639066,
+            2253471352
+          ],
+          "samples_ts": [
+            56.8067,
+            56.797,
+            56.8013
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:04:38Z",
+          "avg_ns": 47150248942,
+          "stddev_ns": 7650567,
+          "avg_ts": 10.858904,
+          "stddev_ts": 0.001762,
+          "samples_ns": [
+            47154505807,
+            47154824278,
+            47141416741
+          ],
+          "samples_ts": [
+            10.8579,
+            10.8578,
+            10.8609
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 669
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:08:14.071574+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:07:01Z\",\n    \"avg_ns\": 9393453277,\n    \"stddev_ns\": 608361,\n    \"avg_ts\": 54.506047,\n    \"stddev_ts\": 0.003530,\n    \"samples_ns\": [ 9392814713, 9393519022, 9394026096 ],\n    \"samples_ts\": [ 54.5098, 54.5057, 54.5027 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:07:38Z\",\n    \"avg_ns\": 11709244585,\n    \"stddev_ns\": 3061843,\n    \"avg_ts\": 10.931534,\n    \"stddev_ts\": 0.002857,\n    \"samples_ns\": [ 11712073100, 11709664123, 11705996533 ],\n    \"samples_ts\": [ 10.9289, 10.9311, 10.9346 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:07:01Z",
+          "avg_ns": 9393453277,
+          "stddev_ns": 608361,
+          "avg_ts": 54.506047,
+          "stddev_ts": 0.00353,
+          "samples_ns": [
+            9392814713,
+            9393519022,
+            9394026096
+          ],
+          "samples_ts": [
+            54.5098,
+            54.5057,
+            54.5027
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:07:38Z",
+          "avg_ns": 11709244585,
+          "stddev_ns": 3061843,
+          "avg_ts": 10.931534,
+          "stddev_ts": 0.002857,
+          "samples_ns": [
+            11712073100,
+            11709664123,
+            11705996533
+          ],
+          "samples_ts": [
+            10.9289,
+            10.9311,
+            10.9346
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 670
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:11:14.300591+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:08:14Z\",\n    \"avg_ns\": 9396905485,\n    \"stddev_ns\": 831699,\n    \"avg_ts\": 54.486022,\n    \"stddev_ts\": 0.004789,\n    \"samples_ns\": [ 9396193010, 9396712531, 9397810915 ],\n    \"samples_ts\": [ 54.4902, 54.4871, 54.4808 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:08:52Z\",\n    \"avg_ns\": 47183084222,\n    \"stddev_ns\": 10716996,\n    \"avg_ts\": 10.851347,\n    \"stddev_ts\": 0.002464,\n    \"samples_ns\": [ 47195271369, 47175143510, 47178837788 ],\n    \"samples_ts\": [ 10.8485, 10.8532, 10.8523 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:08:14Z",
+          "avg_ns": 9396905485,
+          "stddev_ns": 831699,
+          "avg_ts": 54.486022,
+          "stddev_ts": 0.004789,
+          "samples_ns": [
+            9396193010,
+            9396712531,
+            9397810915
+          ],
+          "samples_ts": [
+            54.4902,
+            54.4871,
+            54.4808
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:08:52Z",
+          "avg_ns": 47183084222,
+          "stddev_ns": 10716996,
+          "avg_ts": 10.851347,
+          "stddev_ts": 0.002464,
+          "samples_ns": [
+            47195271369,
+            47175143510,
+            47178837788
+          ],
+          "samples_ts": [
+            10.8485,
+            10.8532,
+            10.8523
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 671
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:11:59.529810+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:11:15Z\",\n    \"avg_ns\": 2255477709,\n    \"stddev_ns\": 997369,\n    \"avg_ts\": 56.750735,\n    \"stddev_ts\": 0.025089,\n    \"samples_ns\": [ 2255066810, 2256614887, 2254751430 ],\n    \"samples_ts\": [ 56.7611, 56.7221, 56.769 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:11:24Z\",\n    \"avg_ns\": 11706751973,\n    \"stddev_ns\": 3011172,\n    \"avg_ts\": 10.933862,\n    \"stddev_ts\": 0.002811,\n    \"samples_ns\": [ 11709802248, 11703785545, 11706668127 ],\n    \"samples_ts\": [ 10.931, 10.9366, 10.9339 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:11:15Z",
+          "avg_ns": 2255477709,
+          "stddev_ns": 997369,
+          "avg_ts": 56.750735,
+          "stddev_ts": 0.025089,
+          "samples_ns": [
+            2255066810,
+            2256614887,
+            2254751430
+          ],
+          "samples_ts": [
+            56.7611,
+            56.7221,
+            56.769
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:11:24Z",
+          "avg_ns": 11706751973,
+          "stddev_ns": 3011172,
+          "avg_ts": 10.933862,
+          "stddev_ts": 0.002811,
+          "samples_ns": [
+            11709802248,
+            11703785545,
+            11706668127
+          ],
+          "samples_ts": [
+            10.931,
+            10.9366,
+            10.9339
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 672
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:14:30.931875+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:12:00Z\",\n    \"avg_ns\": 2257966692,\n    \"stddev_ns\": 713738,\n    \"avg_ts\": 56.688174,\n    \"stddev_ts\": 0.017838,\n    \"samples_ns\": [ 2258720014, 2257308452, 2257871612 ],\n    \"samples_ts\": [ 56.6693, 56.7047, 56.6906 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:12:09Z\",\n    \"avg_ns\": 47099733641,\n    \"stddev_ns\": 6254628,\n    \"avg_ts\": 10.870550,\n    \"stddev_ts\": 0.001444,\n    \"samples_ns\": [ 47093220851, 47105693378, 47100286694 ],\n    \"samples_ts\": [ 10.8721, 10.8692, 10.8704 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:12:00Z",
+          "avg_ns": 2257966692,
+          "stddev_ns": 713738,
+          "avg_ts": 56.688174,
+          "stddev_ts": 0.017838,
+          "samples_ns": [
+            2258720014,
+            2257308452,
+            2257871612
+          ],
+          "samples_ts": [
+            56.6693,
+            56.7047,
+            56.6906
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:12:09Z",
+          "avg_ns": 47099733641,
+          "stddev_ns": 6254628,
+          "avg_ts": 10.87055,
+          "stddev_ts": 0.001444,
+          "samples_ns": [
+            47093220851,
+            47105693378,
+            47100286694
+          ],
+          "samples_ts": [
+            10.8721,
+            10.8692,
+            10.8704
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 673
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:15:43.492447+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:14:31Z\",\n    \"avg_ns\": 9069022903,\n    \"stddev_ns\": 2500708,\n    \"avg_ts\": 56.455919,\n    \"stddev_ts\": 0.015555,\n    \"samples_ns\": [ 9068708095, 9066696330, 9071664285 ],\n    \"samples_ts\": [ 56.4579, 56.4704, 56.4395 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:15:08Z\",\n    \"avg_ns\": 11736583359,\n    \"stddev_ns\": 2729237,\n    \"avg_ts\": 10.906070,\n    \"stddev_ts\": 0.002536,\n    \"samples_ns\": [ 11739699161, 11735434815, 11734616101 ],\n    \"samples_ts\": [ 10.9032, 10.9071, 10.9079 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:14:31Z",
+          "avg_ns": 9069022903,
+          "stddev_ns": 2500708,
+          "avg_ts": 56.455919,
+          "stddev_ts": 0.015555,
+          "samples_ns": [
+            9068708095,
+            9066696330,
+            9071664285
+          ],
+          "samples_ts": [
+            56.4579,
+            56.4704,
+            56.4395
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:15:08Z",
+          "avg_ns": 11736583359,
+          "stddev_ns": 2729237,
+          "avg_ts": 10.90607,
+          "stddev_ts": 0.002536,
+          "samples_ns": [
+            11739699161,
+            11735434815,
+            11734616101
+          ],
+          "samples_ts": [
+            10.9032,
+            10.9071,
+            10.9079
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 674
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:18:42.088983+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:15:44Z\",\n    \"avg_ns\": 9071227361,\n    \"stddev_ns\": 3650991,\n    \"avg_ts\": 56.442203,\n    \"stddev_ts\": 0.022704,\n    \"samples_ns\": [ 9069503003, 9075419797, 9068759284 ],\n    \"samples_ts\": [ 56.4529, 56.4161, 56.4576 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:16:20Z\",\n    \"avg_ns\": 47071893925,\n    \"stddev_ns\": 11940619,\n    \"avg_ts\": 10.876979,\n    \"stddev_ts\": 0.002759,\n    \"samples_ns\": [ 47083063077, 47073310487, 47059308211 ],\n    \"samples_ts\": [ 10.8744, 10.8767, 10.8799 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:15:44Z",
+          "avg_ns": 9071227361,
+          "stddev_ns": 3650991,
+          "avg_ts": 56.442203,
+          "stddev_ts": 0.022704,
+          "samples_ns": [
+            9069503003,
+            9075419797,
+            9068759284
+          ],
+          "samples_ts": [
+            56.4529,
+            56.4161,
+            56.4576
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:16:20Z",
+          "avg_ns": 47071893925,
+          "stddev_ns": 11940619,
+          "avg_ts": 10.876979,
+          "stddev_ts": 0.002759,
+          "samples_ns": [
+            47083063077,
+            47073310487,
+            47059308211
+          ],
+          "samples_ts": [
+            10.8744,
+            10.8767,
+            10.8799
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 675
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:19:27.258918+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:18:43Z\",\n    \"avg_ns\": 2256561844,\n    \"stddev_ns\": 236111,\n    \"avg_ts\": 56.723462,\n    \"stddev_ts\": 0.005935,\n    \"samples_ns\": [ 2256398854, 2256832612, 2256454066 ],\n    \"samples_ts\": [ 56.7276, 56.7167, 56.7262 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:18:52Z\",\n    \"avg_ns\": 11675334989,\n    \"stddev_ns\": 9386197,\n    \"avg_ts\": 10.963288,\n    \"stddev_ts\": 0.008810,\n    \"samples_ns\": [ 11668838178, 11671070453, 11686096336 ],\n    \"samples_ts\": [ 10.9694, 10.9673, 10.9532 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:18:43Z",
+          "avg_ns": 2256561844,
+          "stddev_ns": 236111,
+          "avg_ts": 56.723462,
+          "stddev_ts": 0.005935,
+          "samples_ns": [
+            2256398854,
+            2256832612,
+            2256454066
+          ],
+          "samples_ts": [
+            56.7276,
+            56.7167,
+            56.7262
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:18:52Z",
+          "avg_ns": 11675334989,
+          "stddev_ns": 9386197,
+          "avg_ts": 10.963288,
+          "stddev_ts": 0.00881,
+          "samples_ns": [
+            11668838178,
+            11671070453,
+            11686096336
+          ],
+          "samples_ts": [
+            10.9694,
+            10.9673,
+            10.9532
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 676
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:21:58.654922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:19:28Z\",\n    \"avg_ns\": 2258043036,\n    \"stddev_ns\": 111320,\n    \"avg_ts\": 56.686254,\n    \"stddev_ts\": 0.002527,\n    \"samples_ns\": [ 2258037473, 2257945264, 2258146372 ],\n    \"samples_ts\": [ 56.6864, 56.6887, 56.6837 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:19:37Z\",\n    \"avg_ns\": 47100176719,\n    \"stddev_ns\": 5070058,\n    \"avg_ts\": 10.870448,\n    \"stddev_ts\": 0.001168,\n    \"samples_ns\": [ 47094513859, 47101759032, 47104257268 ],\n    \"samples_ts\": [ 10.8718, 10.8701, 10.8695 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:19:28Z",
+          "avg_ns": 2258043036,
+          "stddev_ns": 111320,
+          "avg_ts": 56.686254,
+          "stddev_ts": 0.002527,
+          "samples_ns": [
+            2258037473,
+            2257945264,
+            2258146372
+          ],
+          "samples_ts": [
+            56.6864,
+            56.6887,
+            56.6837
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:19:37Z",
+          "avg_ns": 47100176719,
+          "stddev_ns": 5070058,
+          "avg_ts": 10.870448,
+          "stddev_ts": 0.001168,
+          "samples_ns": [
+            47094513859,
+            47101759032,
+            47104257268
+          ],
+          "samples_ts": [
+            10.8718,
+            10.8701,
+            10.8695
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 677
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:23:11.012489+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:21:59Z\",\n    \"avg_ns\": 9066048241,\n    \"stddev_ns\": 909326,\n    \"avg_ts\": 56.474441,\n    \"stddev_ts\": 0.005665,\n    \"samples_ns\": [ 9066294336, 9066809192, 9065041195 ],\n    \"samples_ts\": [ 56.4729, 56.4697, 56.4807 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:22:35Z\",\n    \"avg_ns\": 11671957644,\n    \"stddev_ns\": 2184575,\n    \"avg_ts\": 10.966455,\n    \"stddev_ts\": 0.002047,\n    \"samples_ns\": [ 11670285611, 11671165087, 11674422236 ],\n    \"samples_ts\": [ 10.968, 10.9672, 10.9641 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:21:59Z",
+          "avg_ns": 9066048241,
+          "stddev_ns": 909326,
+          "avg_ts": 56.474441,
+          "stddev_ts": 0.005665,
+          "samples_ns": [
+            9066294336,
+            9066809192,
+            9065041195
+          ],
+          "samples_ts": [
+            56.4729,
+            56.4697,
+            56.4807
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:22:35Z",
+          "avg_ns": 11671957644,
+          "stddev_ns": 2184575,
+          "avg_ts": 10.966455,
+          "stddev_ts": 0.002047,
+          "samples_ns": [
+            11670285611,
+            11671165087,
+            11674422236
+          ],
+          "samples_ts": [
+            10.968,
+            10.9672,
+            10.9641
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 678
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:26:09.814049+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:23:11Z\",\n    \"avg_ns\": 9055595516,\n    \"stddev_ns\": 3855434,\n    \"avg_ts\": 56.539635,\n    \"stddev_ts\": 0.024066,\n    \"samples_ns\": [ 9060029748, 9053035523, 9053721277 ],\n    \"samples_ts\": [ 56.512, 56.5556, 56.5513 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:23:48Z\",\n    \"avg_ns\": 47155342709,\n    \"stddev_ns\": 15839014,\n    \"avg_ts\": 10.857731,\n    \"stddev_ts\": 0.003646,\n    \"samples_ns\": [ 47147510520, 47173568685, 47144948924 ],\n    \"samples_ts\": [ 10.8595, 10.8535, 10.8601 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:23:11Z",
+          "avg_ns": 9055595516,
+          "stddev_ns": 3855434,
+          "avg_ts": 56.539635,
+          "stddev_ts": 0.024066,
+          "samples_ns": [
+            9060029748,
+            9053035523,
+            9053721277
+          ],
+          "samples_ts": [
+            56.512,
+            56.5556,
+            56.5513
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:23:48Z",
+          "avg_ns": 47155342709,
+          "stddev_ns": 15839014,
+          "avg_ts": 10.857731,
+          "stddev_ts": 0.003646,
+          "samples_ns": [
+            47147510520,
+            47173568685,
+            47144948924
+          ],
+          "samples_ts": [
+            10.8595,
+            10.8535,
+            10.8601
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 679
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:26:54.968440+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:26:10Z\",\n    \"avg_ns\": 2256053430,\n    \"stddev_ns\": 854329,\n    \"avg_ts\": 56.736250,\n    \"stddev_ts\": 0.021487,\n    \"samples_ns\": [ 2255134726, 2256201549, 2256824015 ],\n    \"samples_ts\": [ 56.7594, 56.7325, 56.7169 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:26:19Z\",\n    \"avg_ns\": 11685112811,\n    \"stddev_ns\": 3368926,\n    \"avg_ts\": 10.954110,\n    \"stddev_ts\": 0.003156,\n    \"samples_ns\": [ 11682564541, 11683843775, 11688930118 ],\n    \"samples_ts\": [ 10.9565, 10.9553, 10.9505 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:26:10Z",
+          "avg_ns": 2256053430,
+          "stddev_ns": 854329,
+          "avg_ts": 56.73625,
+          "stddev_ts": 0.021487,
+          "samples_ns": [
+            2255134726,
+            2256201549,
+            2256824015
+          ],
+          "samples_ts": [
+            56.7594,
+            56.7325,
+            56.7169
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:26:19Z",
+          "avg_ns": 11685112811,
+          "stddev_ns": 3368926,
+          "avg_ts": 10.95411,
+          "stddev_ts": 0.003156,
+          "samples_ns": [
+            11682564541,
+            11683843775,
+            11688930118
+          ],
+          "samples_ts": [
+            10.9565,
+            10.9553,
+            10.9505
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 680
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:29:26.903823+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:26:55Z\",\n    \"avg_ns\": 2259404315,\n    \"stddev_ns\": 864370,\n    \"avg_ts\": 56.652106,\n    \"stddev_ts\": 0.021671,\n    \"samples_ns\": [ 2258609161, 2259279462, 2260324322 ],\n    \"samples_ts\": [ 56.672, 56.6552, 56.629 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:27:04Z\",\n    \"avg_ns\": 47271069062,\n    \"stddev_ns\": 7471973,\n    \"avg_ts\": 10.831149,\n    \"stddev_ts\": 0.001712,\n    \"samples_ns\": [ 47279655048, 47266040523, 47267511615 ],\n    \"samples_ts\": [ 10.8292, 10.8323, 10.832 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:26:55Z",
+          "avg_ns": 2259404315,
+          "stddev_ns": 864370,
+          "avg_ts": 56.652106,
+          "stddev_ts": 0.021671,
+          "samples_ns": [
+            2258609161,
+            2259279462,
+            2260324322
+          ],
+          "samples_ts": [
+            56.672,
+            56.6552,
+            56.629
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:27:04Z",
+          "avg_ns": 47271069062,
+          "stddev_ns": 7471973,
+          "avg_ts": 10.831149,
+          "stddev_ts": 0.001712,
+          "samples_ns": [
+            47279655048,
+            47266040523,
+            47267511615
+          ],
+          "samples_ts": [
+            10.8292,
+            10.8323,
+            10.832
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 681
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:30:40.614745+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:29:27Z\",\n    \"avg_ns\": 9388826656,\n    \"stddev_ns\": 2763186,\n    \"avg_ts\": 54.532909,\n    \"stddev_ts\": 0.016047,\n    \"samples_ns\": [ 9387708540, 9386797749, 9391973679 ],\n    \"samples_ts\": [ 54.5394, 54.5447, 54.5146 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:30:05Z\",\n    \"avg_ns\": 11688459522,\n    \"stddev_ns\": 1425193,\n    \"avg_ts\": 10.950973,\n    \"stddev_ts\": 0.001331,\n    \"samples_ns\": [ 11689590217, 11686864304, 11688924046 ],\n    \"samples_ts\": [ 10.9499, 10.9525, 10.9505 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:29:27Z",
+          "avg_ns": 9388826656,
+          "stddev_ns": 2763186,
+          "avg_ts": 54.532909,
+          "stddev_ts": 0.016047,
+          "samples_ns": [
+            9387708540,
+            9386797749,
+            9391973679
+          ],
+          "samples_ts": [
+            54.5394,
+            54.5447,
+            54.5146
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:30:05Z",
+          "avg_ns": 11688459522,
+          "stddev_ns": 1425193,
+          "avg_ts": 10.950973,
+          "stddev_ts": 0.001331,
+          "samples_ns": [
+            11689590217,
+            11686864304,
+            11688924046
+          ],
+          "samples_ts": [
+            10.9499,
+            10.9525,
+            10.9505
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 682
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:33:40.938309+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:30:41Z\",\n    \"avg_ns\": 9363922191,\n    \"stddev_ns\": 2835847,\n    \"avg_ts\": 54.677946,\n    \"stddev_ts\": 0.016556,\n    \"samples_ns\": [ 9362107788, 9367190110, 9362468675 ],\n    \"samples_ts\": [ 54.6885, 54.6589, 54.6864 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:31:19Z\",\n    \"avg_ns\": 47245027683,\n    \"stddev_ns\": 5803251,\n    \"avg_ts\": 10.837119,\n    \"stddev_ts\": 0.001330,\n    \"samples_ns\": [ 47249448535, 47238461510, 47247173005 ],\n    \"samples_ts\": [ 10.8361, 10.8386, 10.8366 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:30:41Z",
+          "avg_ns": 9363922191,
+          "stddev_ns": 2835847,
+          "avg_ts": 54.677946,
+          "stddev_ts": 0.016556,
+          "samples_ns": [
+            9362107788,
+            9367190110,
+            9362468675
+          ],
+          "samples_ts": [
+            54.6885,
+            54.6589,
+            54.6864
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:31:19Z",
+          "avg_ns": 47245027683,
+          "stddev_ns": 5803251,
+          "avg_ts": 10.837119,
+          "stddev_ts": 0.00133,
+          "samples_ns": [
+            47249448535,
+            47238461510,
+            47247173005
+          ],
+          "samples_ts": [
+            10.8361,
+            10.8386,
+            10.8366
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 683
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:34:27.152968+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:33:41Z\",\n    \"avg_ns\": 1751620116,\n    \"stddev_ns\": 254656,\n    \"avg_ts\": 73.075207,\n    \"stddev_ts\": 0.010480,\n    \"samples_ns\": [ 1751797656, 1751332706, 1751729987 ],\n    \"samples_ts\": [ 73.0678, 73.0872, 73.0706 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:33:48Z\",\n    \"avg_ns\": 12708398126,\n    \"stddev_ns\": 14796070,\n    \"avg_ts\": 10.072089,\n    \"stddev_ts\": 0.011732,\n    \"samples_ns\": [ 12720314411, 12713042465, 12691837503 ],\n    \"samples_ts\": [ 10.0626, 10.0684, 10.0852 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:33:41Z",
+          "avg_ns": 1751620116,
+          "stddev_ns": 254656,
+          "avg_ts": 73.075207,
+          "stddev_ts": 0.01048,
+          "samples_ns": [
+            1751797656,
+            1751332706,
+            1751729987
+          ],
+          "samples_ts": [
+            73.0678,
+            73.0872,
+            73.0706
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:33:48Z",
+          "avg_ns": 12708398126,
+          "stddev_ns": 14796070,
+          "avg_ts": 10.072089,
+          "stddev_ts": 0.011732,
+          "samples_ns": [
+            12720314411,
+            12713042465,
+            12691837503
+          ],
+          "samples_ts": [
+            10.0626,
+            10.0684,
+            10.0852
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 684
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:37:08.904608+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:34:28Z\",\n    \"avg_ns\": 1756886700,\n    \"stddev_ns\": 1020893,\n    \"avg_ts\": 72.856166,\n    \"stddev_ts\": 0.042278,\n    \"samples_ns\": [ 1755710437, 1757443027, 1757506638 ],\n    \"samples_ts\": [ 72.905, 72.8331, 72.8305 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:34:35Z\",\n    \"avg_ns\": 51205246048,\n    \"stddev_ns\": 26755146,\n    \"avg_ts\": 9.998977,\n    \"stddev_ts\": 0.005223,\n    \"samples_ns\": [ 51236010500, 51192292163, 51187435483 ],\n    \"samples_ts\": [ 9.99297, 10.0015, 10.0025 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:34:28Z",
+          "avg_ns": 1756886700,
+          "stddev_ns": 1020893,
+          "avg_ts": 72.856166,
+          "stddev_ts": 0.042278,
+          "samples_ns": [
+            1755710437,
+            1757443027,
+            1757506638
+          ],
+          "samples_ts": [
+            72.905,
+            72.8331,
+            72.8305
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:34:35Z",
+          "avg_ns": 51205246048,
+          "stddev_ns": 26755146,
+          "avg_ts": 9.998977,
+          "stddev_ts": 0.005223,
+          "samples_ns": [
+            51236010500,
+            51192292163,
+            51187435483
+          ],
+          "samples_ts": [
+            9.99297,
+            10.0015,
+            10.0025
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 685
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:38:17.515995+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:37:09Z\",\n    \"avg_ns\": 7033433834,\n    \"stddev_ns\": 2545278,\n    \"avg_ts\": 72.795175,\n    \"stddev_ts\": 0.026328,\n    \"samples_ns\": [ 7036036209, 7033312539, 7030952755 ],\n    \"samples_ts\": [ 72.7682, 72.7964, 72.8209 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:37:37Z\",\n    \"avg_ns\": 13135315935,\n    \"stddev_ns\": 53998134,\n    \"avg_ts\": 9.744832,\n    \"stddev_ts\": 0.040132,\n    \"samples_ns\": [ 13074719556, 13178336456, 13152891793 ],\n    \"samples_ts\": [ 9.78988, 9.71291, 9.7317 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:37:09Z",
+          "avg_ns": 7033433834,
+          "stddev_ns": 2545278,
+          "avg_ts": 72.795175,
+          "stddev_ts": 0.026328,
+          "samples_ns": [
+            7036036209,
+            7033312539,
+            7030952755
+          ],
+          "samples_ts": [
+            72.7682,
+            72.7964,
+            72.8209
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:37:37Z",
+          "avg_ns": 13135315935,
+          "stddev_ns": 53998134,
+          "avg_ts": 9.744832,
+          "stddev_ts": 0.040132,
+          "samples_ns": [
+            13074719556,
+            13178336456,
+            13152891793
+          ],
+          "samples_ts": [
+            9.78988,
+            9.71291,
+            9.7317
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 686
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:41:24.533948+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:38:18Z\",\n    \"avg_ns\": 7030323957,\n    \"stddev_ns\": 2337061,\n    \"avg_ts\": 72.827375,\n    \"stddev_ts\": 0.024213,\n    \"samples_ns\": [ 7027732307, 7030968345, 7032271219 ],\n    \"samples_ts\": [ 72.8542, 72.8207, 72.8072 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:38:46Z\",\n    \"avg_ns\": 52602123964,\n    \"stddev_ns\": 3199914841,\n    \"avg_ts\": 9.735857,\n    \"stddev_ts\": 0.188650,\n    \"samples_ns\": [ 53206241666, 53161670710, 51438459518 ],\n    \"samples_ts\": [ 9.62293, 9.631, 9.95364 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:38:18Z",
+          "avg_ns": 7030323957,
+          "stddev_ns": 2337061,
+          "avg_ts": 72.827375,
+          "stddev_ts": 0.024213,
+          "samples_ns": [
+            7027732307,
+            7030968345,
+            7032271219
+          ],
+          "samples_ts": [
+            72.8542,
+            72.8207,
+            72.8072
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:38:46Z",
+          "avg_ns": 52602123964,
+          "stddev_ns": 3199914841,
+          "avg_ts": 9.735857,
+          "stddev_ts": 0.18865,
+          "samples_ns": [
+            53206241666,
+            53161670710,
+            51438459518
+          ],
+          "samples_ts": [
+            9.62293,
+            9.631,
+            9.95364
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 687
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:42:10.924680+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:41:25Z\",\n    \"avg_ns\": 1754847989,\n    \"stddev_ns\": 4827319,\n    \"avg_ts\": 72.941158,\n    \"stddev_ts\": 0.200324,\n    \"samples_ns\": [ 1752113279, 1752009126, 1760421563 ],\n    \"samples_ts\": [ 73.0546, 73.059, 72.7099 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:41:32Z\",\n    \"avg_ns\": 12760567026,\n    \"stddev_ns\": 43714396,\n    \"avg_ts\": 10.030981,\n    \"stddev_ts\": 0.034316,\n    \"samples_ns\": [ 12724849839, 12747536341, 12809314899 ],\n    \"samples_ts\": [ 10.0591, 10.0412, 9.99273 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:41:25Z",
+          "avg_ns": 1754847989,
+          "stddev_ns": 4827319,
+          "avg_ts": 72.941158,
+          "stddev_ts": 0.200324,
+          "samples_ns": [
+            1752113279,
+            1752009126,
+            1760421563
+          ],
+          "samples_ts": [
+            73.0546,
+            73.059,
+            72.7099
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:41:32Z",
+          "avg_ns": 12760567026,
+          "stddev_ns": 43714396,
+          "avg_ts": 10.030981,
+          "stddev_ts": 0.034316,
+          "samples_ns": [
+            12724849839,
+            12747536341,
+            12809314899
+          ],
+          "samples_ts": [
+            10.0591,
+            10.0412,
+            9.99273
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 688
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:44:52.791783+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:42:11Z\",\n    \"avg_ns\": 1748579899,\n    \"stddev_ns\": 909356,\n    \"avg_ts\": 73.202273,\n    \"stddev_ts\": 0.037980,\n    \"samples_ns\": [ 1749597264, 1748288379, 1747854056 ],\n    \"samples_ts\": [ 73.1597, 73.2145, 73.2327 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:42:18Z\",\n    \"avg_ns\": 51255599698,\n    \"stddev_ns\": 190235125,\n    \"avg_ts\": 9.989244,\n    \"stddev_ts\": 0.036996,\n    \"samples_ns\": [ 51475092160, 51138327091, 51153379844 ],\n    \"samples_ts\": [ 9.94656, 10.0121, 10.0091 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:42:11Z",
+          "avg_ns": 1748579899,
+          "stddev_ns": 909356,
+          "avg_ts": 73.202273,
+          "stddev_ts": 0.03798,
+          "samples_ns": [
+            1749597264,
+            1748288379,
+            1747854056
+          ],
+          "samples_ts": [
+            73.1597,
+            73.2145,
+            73.2327
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:42:18Z",
+          "avg_ns": 51255599698,
+          "stddev_ns": 190235125,
+          "avg_ts": 9.989244,
+          "stddev_ts": 0.036996,
+          "samples_ns": [
+            51475092160,
+            51138327091,
+            51153379844
+          ],
+          "samples_ts": [
+            9.94656,
+            10.0121,
+            10.0091
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 689
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:46:01.710341+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:44:53Z\",\n    \"avg_ns\": 7038567718,\n    \"stddev_ns\": 9966762,\n    \"avg_ts\": 72.742169,\n    \"stddev_ts\": 0.102957,\n    \"samples_ns\": [ 7036273328, 7029948225, 7049481601 ],\n    \"samples_ts\": [ 72.7658, 72.8313, 72.6295 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:45:21Z\",\n    \"avg_ns\": 13214337938,\n    \"stddev_ns\": 49247073,\n    \"avg_ts\": 9.686538,\n    \"stddev_ts\": 0.036164,\n    \"samples_ns\": [ 13158613494, 13252016001, 13232384320 ],\n    \"samples_ts\": [ 9.72747, 9.65891, 9.67324 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:44:53Z",
+          "avg_ns": 7038567718,
+          "stddev_ns": 9966762,
+          "avg_ts": 72.742169,
+          "stddev_ts": 0.102957,
+          "samples_ns": [
+            7036273328,
+            7029948225,
+            7049481601
+          ],
+          "samples_ts": [
+            72.7658,
+            72.8313,
+            72.6295
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:45:21Z",
+          "avg_ns": 13214337938,
+          "stddev_ns": 49247073,
+          "avg_ts": 9.686538,
+          "stddev_ts": 0.036164,
+          "samples_ns": [
+            13158613494,
+            13252016001,
+            13232384320
+          ],
+          "samples_ts": [
+            9.72747,
+            9.65891,
+            9.67324
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 690
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:49:10.287652+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:46:02Z\",\n    \"avg_ns\": 7039636044,\n    \"stddev_ns\": 6097203,\n    \"avg_ts\": 72.731069,\n    \"stddev_ts\": 0.062996,\n    \"samples_ns\": [ 7040205630, 7045427890, 7033274613 ],\n    \"samples_ts\": [ 72.7251, 72.6712, 72.7968 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:46:30Z\",\n    \"avg_ns\": 53104626552,\n    \"stddev_ns\": 25736823,\n    \"avg_ts\": 9.641346,\n    \"stddev_ts\": 0.004674,\n    \"samples_ns\": [ 53074913138, 53119818818, 53119147702 ],\n    \"samples_ts\": [ 9.64674, 9.63859, 9.63871 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:46:02Z",
+          "avg_ns": 7039636044,
+          "stddev_ns": 6097203,
+          "avg_ts": 72.731069,
+          "stddev_ts": 0.062996,
+          "samples_ns": [
+            7040205630,
+            7045427890,
+            7033274613
+          ],
+          "samples_ts": [
+            72.7251,
+            72.6712,
+            72.7968
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:46:30Z",
+          "avg_ns": 53104626552,
+          "stddev_ns": 25736823,
+          "avg_ts": 9.641346,
+          "stddev_ts": 0.004674,
+          "samples_ns": [
+            53074913138,
+            53119818818,
+            53119147702
+          ],
+          "samples_ts": [
+            9.64674,
+            9.63859,
+            9.63871
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 691
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:49:57.734058+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:49:11Z\",\n    \"avg_ns\": 1752490193,\n    \"stddev_ns\": 281712,\n    \"avg_ts\": 73.038926,\n    \"stddev_ts\": 0.011480,\n    \"samples_ns\": [ 1752176803, 1752693757, 1752600021 ],\n    \"samples_ts\": [ 73.052, 73.0304, 73.0343 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:49:18Z\",\n    \"avg_ns\": 13108761158,\n    \"stddev_ns\": 125679573,\n    \"avg_ts\": 9.765064,\n    \"stddev_ts\": 0.094141,\n    \"samples_ns\": [ 12963698950, 13177680373, 13184904153 ],\n    \"samples_ts\": [ 9.87373, 9.71339, 9.70807 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:49:11Z",
+          "avg_ns": 1752490193,
+          "stddev_ns": 281712,
+          "avg_ts": 73.038926,
+          "stddev_ts": 0.01148,
+          "samples_ns": [
+            1752176803,
+            1752693757,
+            1752600021
+          ],
+          "samples_ts": [
+            73.052,
+            73.0304,
+            73.0343
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:49:18Z",
+          "avg_ns": 13108761158,
+          "stddev_ns": 125679573,
+          "avg_ts": 9.765064,
+          "stddev_ts": 0.094141,
+          "samples_ns": [
+            12963698950,
+            13177680373,
+            13184904153
+          ],
+          "samples_ts": [
+            9.87373,
+            9.71339,
+            9.70807
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 692
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:52:44.853034+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:49:58Z\",\n    \"avg_ns\": 1754851739,\n    \"stddev_ns\": 1680807,\n    \"avg_ts\": 72.940679,\n    \"stddev_ts\": 0.069832,\n    \"samples_ns\": [ 1753554810, 1754249765, 1756750642 ],\n    \"samples_ts\": [ 72.9946, 72.9657, 72.8618 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:50:05Z\",\n    \"avg_ns\": 52994504537,\n    \"stddev_ns\": 244506680,\n    \"avg_ts\": 9.661516,\n    \"stddev_ts\": 0.044579,\n    \"samples_ns\": [ 52996034687, 53238242552, 52749236372 ],\n    \"samples_ts\": [ 9.6611, 9.61715, 9.7063 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:49:58Z",
+          "avg_ns": 1754851739,
+          "stddev_ns": 1680807,
+          "avg_ts": 72.940679,
+          "stddev_ts": 0.069832,
+          "samples_ns": [
+            1753554810,
+            1754249765,
+            1756750642
+          ],
+          "samples_ts": [
+            72.9946,
+            72.9657,
+            72.8618
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:50:05Z",
+          "avg_ns": 52994504537,
+          "stddev_ns": 244506680,
+          "avg_ts": 9.661516,
+          "stddev_ts": 0.044579,
+          "samples_ns": [
+            52996034687,
+            53238242552,
+            52749236372
+          ],
+          "samples_ts": [
+            9.6611,
+            9.61715,
+            9.7063
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 693
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:53:54.933837+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:52:45Z\",\n    \"avg_ns\": 7334849430,\n    \"stddev_ns\": 6547289,\n    \"avg_ts\": 69.803788,\n    \"stddev_ts\": 0.062341,\n    \"samples_ns\": [ 7338599410, 7338659529, 7327289351 ],\n    \"samples_ts\": [ 69.7681, 69.7675, 69.8758 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:53:15Z\",\n    \"avg_ns\": 13216971336,\n    \"stddev_ns\": 10766329,\n    \"avg_ts\": 9.684523,\n    \"stddev_ts\": 0.007888,\n    \"samples_ns\": [ 13206041150, 13227563386, 13217309474 ],\n    \"samples_ts\": [ 9.69253, 9.67676, 9.68427 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:52:45Z",
+          "avg_ns": 7334849430,
+          "stddev_ns": 6547289,
+          "avg_ts": 69.803788,
+          "stddev_ts": 0.062341,
+          "samples_ns": [
+            7338599410,
+            7338659529,
+            7327289351
+          ],
+          "samples_ts": [
+            69.7681,
+            69.7675,
+            69.8758
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:53:15Z",
+          "avg_ns": 13216971336,
+          "stddev_ns": 10766329,
+          "avg_ts": 9.684523,
+          "stddev_ts": 0.007888,
+          "samples_ns": [
+            13206041150,
+            13227563386,
+            13217309474
+          ],
+          "samples_ts": [
+            9.69253,
+            9.67676,
+            9.68427
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 694
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:57:04.951340+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:53:55Z\",\n    \"avg_ns\": 7342258968,\n    \"stddev_ns\": 1857917,\n    \"avg_ts\": 69.733310,\n    \"stddev_ts\": 0.017624,\n    \"samples_ns\": [ 7341209554, 7341165478, 7344401873 ],\n    \"samples_ts\": [ 69.7433, 69.7437, 69.713 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:54:25Z\",\n    \"avg_ns\": 53183314247,\n    \"stddev_ns\": 93013700,\n    \"avg_ts\": 9.627099,\n    \"stddev_ts\": 0.016854,\n    \"samples_ns\": [ 53076177158, 53243415415, 53230350170 ],\n    \"samples_ts\": [ 9.64651, 9.61621, 9.61857 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:53:55Z",
+          "avg_ns": 7342258968,
+          "stddev_ns": 1857917,
+          "avg_ts": 69.73331,
+          "stddev_ts": 0.017624,
+          "samples_ns": [
+            7341209554,
+            7341165478,
+            7344401873
+          ],
+          "samples_ts": [
+            69.7433,
+            69.7437,
+            69.713
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:54:25Z",
+          "avg_ns": 53183314247,
+          "stddev_ns": 93013700,
+          "avg_ts": 9.627099,
+          "stddev_ts": 0.016854,
+          "samples_ns": [
+            53076177158,
+            53243415415,
+            53230350170
+          ],
+          "samples_ts": [
+            9.64651,
+            9.61621,
+            9.61857
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 695
+    },
+    {
+      "timestamp_utc": "2025-12-09T10:57:52.499589+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:57:05Z\",\n    \"avg_ns\": 1749937281,\n    \"stddev_ns\": 877845,\n    \"avg_ts\": 73.145491,\n    \"stddev_ts\": 0.036620,\n    \"samples_ns\": [ 1750337758, 1750541299, 1748932788 ],\n    \"samples_ts\": [ 73.1287, 73.1202, 73.1875 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:57:12Z\",\n    \"avg_ns\": 13149563702,\n    \"stddev_ns\": 152752649,\n    \"avg_ts\": 9.735045,\n    \"stddev_ts\": 0.113841,\n    \"samples_ns\": [ 12973451599, 13229148189, 13246091318 ],\n    \"samples_ts\": [ 9.8663, 9.6756, 9.66323 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:57:05Z",
+          "avg_ns": 1749937281,
+          "stddev_ns": 877845,
+          "avg_ts": 73.145491,
+          "stddev_ts": 0.03662,
+          "samples_ns": [
+            1750337758,
+            1750541299,
+            1748932788
+          ],
+          "samples_ts": [
+            73.1287,
+            73.1202,
+            73.1875
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:57:12Z",
+          "avg_ns": 13149563702,
+          "stddev_ns": 152752649,
+          "avg_ts": 9.735045,
+          "stddev_ts": 0.113841,
+          "samples_ns": [
+            12973451599,
+            13229148189,
+            13246091318
+          ],
+          "samples_ts": [
+            9.8663,
+            9.6756,
+            9.66323
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 696
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:00:39.693715+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:57:53Z\",\n    \"avg_ns\": 1750226786,\n    \"stddev_ns\": 1746915,\n    \"avg_ts\": 73.133428,\n    \"stddev_ts\": 0.072983,\n    \"samples_ns\": [ 1752066626, 1750023075, 1748590657 ],\n    \"samples_ts\": [ 73.0566, 73.1419, 73.2018 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T10:58:00Z\",\n    \"avg_ns\": 53035913944,\n    \"stddev_ns\": 223638116,\n    \"avg_ts\": 9.653950,\n    \"stddev_ts\": 0.040706,\n    \"samples_ns\": [ 53033648931, 53260675728, 52813417175 ],\n    \"samples_ts\": [ 9.65425, 9.6131, 9.69451 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:57:53Z",
+          "avg_ns": 1750226786,
+          "stddev_ns": 1746915,
+          "avg_ts": 73.133428,
+          "stddev_ts": 0.072983,
+          "samples_ns": [
+            1752066626,
+            1750023075,
+            1748590657
+          ],
+          "samples_ts": [
+            73.0566,
+            73.1419,
+            73.2018
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T10:58:00Z",
+          "avg_ns": 53035913944,
+          "stddev_ns": 223638116,
+          "avg_ts": 9.65395,
+          "stddev_ts": 0.040706,
+          "samples_ns": [
+            53033648931,
+            53260675728,
+            52813417175
+          ],
+          "samples_ts": [
+            9.65425,
+            9.6131,
+            9.69451
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 697
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:01:48.395864+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:00:40Z\",\n    \"avg_ns\": 7019470427,\n    \"stddev_ns\": 1284640,\n    \"avg_ts\": 72.939977,\n    \"stddev_ts\": 0.013349,\n    \"samples_ns\": [ 7020756532, 7018187256, 7019467493 ],\n    \"samples_ts\": [ 72.9266, 72.9533, 72.94 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:01:08Z\",\n    \"avg_ns\": 13180507405,\n    \"stddev_ns\": 62837116,\n    \"avg_ts\": 9.711458,\n    \"stddev_ts\": 0.046420,\n    \"samples_ns\": [ 13108351234, 13223188916, 13209982066 ],\n    \"samples_ts\": [ 9.76477, 9.67996, 9.68964 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:00:40Z",
+          "avg_ns": 7019470427,
+          "stddev_ns": 1284640,
+          "avg_ts": 72.939977,
+          "stddev_ts": 0.013349,
+          "samples_ns": [
+            7020756532,
+            7018187256,
+            7019467493
+          ],
+          "samples_ts": [
+            72.9266,
+            72.9533,
+            72.94
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:01:08Z",
+          "avg_ns": 13180507405,
+          "stddev_ns": 62837116,
+          "avg_ts": 9.711458,
+          "stddev_ts": 0.04642,
+          "samples_ns": [
+            13108351234,
+            13223188916,
+            13209982066
+          ],
+          "samples_ts": [
+            9.76477,
+            9.67996,
+            9.68964
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 698
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:04:56.124407+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:01:49Z\",\n    \"avg_ns\": 7016584117,\n    \"stddev_ns\": 5701732,\n    \"avg_ts\": 72.970012,\n    \"stddev_ts\": 0.059258,\n    \"samples_ns\": [ 7023088449, 7014207153, 7012456751 ],\n    \"samples_ts\": [ 72.9024, 72.9947, 73.0129 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:02:17Z\",\n    \"avg_ns\": 52854586098,\n    \"stddev_ns\": 458170723,\n    \"avg_ts\": 9.687443,\n    \"stddev_ts\": 0.084377,\n    \"samples_ns\": [ 53166178305, 53069064328, 52328515662 ],\n    \"samples_ts\": [ 9.63018, 9.64781, 9.78434 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:01:49Z",
+          "avg_ns": 7016584117,
+          "stddev_ns": 5701732,
+          "avg_ts": 72.970012,
+          "stddev_ts": 0.059258,
+          "samples_ns": [
+            7023088449,
+            7014207153,
+            7012456751
+          ],
+          "samples_ts": [
+            72.9024,
+            72.9947,
+            73.0129
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:02:17Z",
+          "avg_ns": 52854586098,
+          "stddev_ns": 458170723,
+          "avg_ts": 9.687443,
+          "stddev_ts": 0.084377,
+          "samples_ns": [
+            53166178305,
+            53069064328,
+            52328515662
+          ],
+          "samples_ts": [
+            9.63018,
+            9.64781,
+            9.78434
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 699
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:05:42.333711+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:04:57Z\",\n    \"avg_ns\": 1758325919,\n    \"stddev_ns\": 1284162,\n    \"avg_ts\": 72.796541,\n    \"stddev_ts\": 0.053087,\n    \"samples_ns\": [ 1757573616, 1759807099, 1757597044 ],\n    \"samples_ts\": [ 72.8277, 72.7352, 72.8267 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:05:04Z\",\n    \"avg_ns\": 12683269970,\n    \"stddev_ns\": 26818526,\n    \"avg_ts\": 10.092065,\n    \"stddev_ts\": 0.021365,\n    \"samples_ns\": [ 12699470445, 12698025071, 12652314396 ],\n    \"samples_ts\": [ 10.0792, 10.0803, 10.1167 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:04:57Z",
+          "avg_ns": 1758325919,
+          "stddev_ns": 1284162,
+          "avg_ts": 72.796541,
+          "stddev_ts": 0.053087,
+          "samples_ns": [
+            1757573616,
+            1759807099,
+            1757597044
+          ],
+          "samples_ts": [
+            72.8277,
+            72.7352,
+            72.8267
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:05:04Z",
+          "avg_ns": 12683269970,
+          "stddev_ns": 26818526,
+          "avg_ts": 10.092065,
+          "stddev_ts": 0.021365,
+          "samples_ns": [
+            12699470445,
+            12698025071,
+            12652314396
+          ],
+          "samples_ts": [
+            10.0792,
+            10.0803,
+            10.1167
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 700
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:08:24.232798+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:05:43Z\",\n    \"avg_ns\": 1752107690,\n    \"stddev_ns\": 1228524,\n    \"avg_ts\": 73.054894,\n    \"stddev_ts\": 0.051185,\n    \"samples_ns\": [ 1750694709, 1752722634, 1752905729 ],\n    \"samples_ts\": [ 73.1138, 73.0292, 73.0216 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:05:50Z\",\n    \"avg_ns\": 51269782210,\n    \"stddev_ns\": 11826268,\n    \"avg_ts\": 9.986390,\n    \"stddev_ts\": 0.002304,\n    \"samples_ns\": [ 51276200011, 51277012155, 51256134464 ],\n    \"samples_ts\": [ 9.98514, 9.98498, 9.98905 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:05:43Z",
+          "avg_ns": 1752107690,
+          "stddev_ns": 1228524,
+          "avg_ts": 73.054894,
+          "stddev_ts": 0.051185,
+          "samples_ns": [
+            1750694709,
+            1752722634,
+            1752905729
+          ],
+          "samples_ts": [
+            73.1138,
+            73.0292,
+            73.0216
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:05:50Z",
+          "avg_ns": 51269782210,
+          "stddev_ns": 11826268,
+          "avg_ts": 9.98639,
+          "stddev_ts": 0.002304,
+          "samples_ns": [
+            51276200011,
+            51277012155,
+            51256134464
+          ],
+          "samples_ts": [
+            9.98514,
+            9.98498,
+            9.98905
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 701
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:09:33.080535+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:08:25Z\",\n    \"avg_ns\": 7034377102,\n    \"stddev_ns\": 3431274,\n    \"avg_ts\": 72.785418,\n    \"stddev_ts\": 0.035496,\n    \"samples_ns\": [ 7038236848, 7033222092, 7031672366 ],\n    \"samples_ts\": [ 72.7455, 72.7974, 72.8134 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:08:53Z\",\n    \"avg_ns\": 13207981928,\n    \"stddev_ns\": 57774009,\n    \"avg_ts\": 9.691233,\n    \"stddev_ts\": 0.042486,\n    \"samples_ns\": [ 13142110506, 13231777630, 13250057649 ],\n    \"samples_ts\": [ 9.73968, 9.67368, 9.66034 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:08:25Z",
+          "avg_ns": 7034377102,
+          "stddev_ns": 3431274,
+          "avg_ts": 72.785418,
+          "stddev_ts": 0.035496,
+          "samples_ns": [
+            7038236848,
+            7033222092,
+            7031672366
+          ],
+          "samples_ts": [
+            72.7455,
+            72.7974,
+            72.8134
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:08:53Z",
+          "avg_ns": 13207981928,
+          "stddev_ns": 57774009,
+          "avg_ts": 9.691233,
+          "stddev_ts": 0.042486,
+          "samples_ns": [
+            13142110506,
+            13231777630,
+            13250057649
+          ],
+          "samples_ts": [
+            9.73968,
+            9.67368,
+            9.66034
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 702
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:12:40.785952+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:09:33Z\",\n    \"avg_ns\": 7032617048,\n    \"stddev_ns\": 1727925,\n    \"avg_ts\": 72.803626,\n    \"stddev_ts\": 0.017844,\n    \"samples_ns\": [ 7031326023, 7031950463, 7034574660 ],\n    \"samples_ts\": [ 72.817, 72.8105, 72.7834 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:10:02Z\",\n    \"avg_ns\": 52821912804,\n    \"stddev_ns\": 961701758,\n    \"avg_ts\": 9.695112,\n    \"stddev_ts\": 0.178380,\n    \"samples_ns\": [ 53344729137, 53408954744, 51712054531 ],\n    \"samples_ts\": [ 9.59795, 9.58641, 9.90098 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:09:33Z",
+          "avg_ns": 7032617048,
+          "stddev_ns": 1727925,
+          "avg_ts": 72.803626,
+          "stddev_ts": 0.017844,
+          "samples_ns": [
+            7031326023,
+            7031950463,
+            7034574660
+          ],
+          "samples_ts": [
+            72.817,
+            72.8105,
+            72.7834
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:10:02Z",
+          "avg_ns": 52821912804,
+          "stddev_ns": 961701758,
+          "avg_ts": 9.695112,
+          "stddev_ts": 0.17838,
+          "samples_ns": [
+            53344729137,
+            53408954744,
+            51712054531
+          ],
+          "samples_ts": [
+            9.59795,
+            9.58641,
+            9.90098
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 703
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:13:26.952097+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:12:41Z\",\n    \"avg_ns\": 1752174177,\n    \"stddev_ns\": 1656717,\n    \"avg_ts\": 73.052142,\n    \"stddev_ts\": 0.069107,\n    \"samples_ns\": [ 1752887858, 1750280226, 1753354447 ],\n    \"samples_ts\": [ 73.0224, 73.1311, 73.0029 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:12:48Z\",\n    \"avg_ns\": 12690966157,\n    \"stddev_ns\": 31332573,\n    \"avg_ts\": 10.085955,\n    \"stddev_ts\": 0.024865,\n    \"samples_ns\": [ 12727140645, 12672369354, 12673388474 ],\n    \"samples_ts\": [ 10.0572, 10.1007, 10.0999 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:12:41Z",
+          "avg_ns": 1752174177,
+          "stddev_ns": 1656717,
+          "avg_ts": 73.052142,
+          "stddev_ts": 0.069107,
+          "samples_ns": [
+            1752887858,
+            1750280226,
+            1753354447
+          ],
+          "samples_ts": [
+            73.0224,
+            73.1311,
+            73.0029
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:12:48Z",
+          "avg_ns": 12690966157,
+          "stddev_ns": 31332573,
+          "avg_ts": 10.085955,
+          "stddev_ts": 0.024865,
+          "samples_ns": [
+            12727140645,
+            12672369354,
+            12673388474
+          ],
+          "samples_ts": [
+            10.0572,
+            10.1007,
+            10.0999
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 704
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:16:10.615351+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:13:27Z\",\n    \"avg_ns\": 1747614538,\n    \"stddev_ns\": 733987,\n    \"avg_ts\": 73.242704,\n    \"stddev_ts\": 0.030654,\n    \"samples_ns\": [ 1747128880, 1747258750, 1748455986 ],\n    \"samples_ts\": [ 73.2631, 73.2576, 73.2074 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:13:34Z\",\n    \"avg_ns\": 51856231091,\n    \"stddev_ns\": 599969046,\n    \"avg_ts\": 9.874328,\n    \"stddev_ts\": 0.113559,\n    \"samples_ns\": [ 51600708589, 52541660696, 51426323989 ],\n    \"samples_ts\": [ 9.92234, 9.74465, 9.95599 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:13:27Z",
+          "avg_ns": 1747614538,
+          "stddev_ns": 733987,
+          "avg_ts": 73.242704,
+          "stddev_ts": 0.030654,
+          "samples_ns": [
+            1747128880,
+            1747258750,
+            1748455986
+          ],
+          "samples_ts": [
+            73.2631,
+            73.2576,
+            73.2074
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:13:34Z",
+          "avg_ns": 51856231091,
+          "stddev_ns": 599969046,
+          "avg_ts": 9.874328,
+          "stddev_ts": 0.113559,
+          "samples_ns": [
+            51600708589,
+            52541660696,
+            51426323989
+          ],
+          "samples_ts": [
+            9.92234,
+            9.74465,
+            9.95599
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 705
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:17:20.841509+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:16:11Z\",\n    \"avg_ns\": 7332662880,\n    \"stddev_ns\": 2510126,\n    \"avg_ts\": 69.824571,\n    \"stddev_ts\": 0.023879,\n    \"samples_ns\": [ 7329781311, 7333861826, 7334345505 ],\n    \"samples_ts\": [ 69.852, 69.8132, 69.8085 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:16:40Z\",\n    \"avg_ns\": 13265898169,\n    \"stddev_ns\": 16672099,\n    \"avg_ts\": 9.648810,\n    \"stddev_ts\": 0.012133,\n    \"samples_ns\": [ 13247236873, 13279324660, 13271132974 ],\n    \"samples_ts\": [ 9.66239, 9.63904, 9.64499 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:16:11Z",
+          "avg_ns": 7332662880,
+          "stddev_ns": 2510126,
+          "avg_ts": 69.824571,
+          "stddev_ts": 0.023879,
+          "samples_ns": [
+            7329781311,
+            7333861826,
+            7334345505
+          ],
+          "samples_ts": [
+            69.852,
+            69.8132,
+            69.8085
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:16:40Z",
+          "avg_ns": 13265898169,
+          "stddev_ns": 16672099,
+          "avg_ts": 9.64881,
+          "stddev_ts": 0.012133,
+          "samples_ns": [
+            13247236873,
+            13279324660,
+            13271132974
+          ],
+          "samples_ts": [
+            9.66239,
+            9.63904,
+            9.64499
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 706
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:20:30.711688+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:17:21Z\",\n    \"avg_ns\": 7333705304,\n    \"stddev_ns\": 4249077,\n    \"avg_ts\": 69.814656,\n    \"stddev_ts\": 0.040433,\n    \"samples_ns\": [ 7338412766, 7330156929, 7332546218 ],\n    \"samples_ts\": [ 69.7699, 69.8484, 69.8257 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:17:51Z\",\n    \"avg_ns\": 53136037074,\n    \"stddev_ns\": 87931925,\n    \"avg_ts\": 9.635663,\n    \"stddev_ts\": 0.015942,\n    \"samples_ns\": [ 53128420418, 53227529571, 53052161233 ],\n    \"samples_ts\": [ 9.63703, 9.61908, 9.65088 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:17:21Z",
+          "avg_ns": 7333705304,
+          "stddev_ns": 4249077,
+          "avg_ts": 69.814656,
+          "stddev_ts": 0.040433,
+          "samples_ns": [
+            7338412766,
+            7330156929,
+            7332546218
+          ],
+          "samples_ts": [
+            69.7699,
+            69.8484,
+            69.8257
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:17:51Z",
+          "avg_ns": 53136037074,
+          "stddev_ns": 87931925,
+          "avg_ts": 9.635663,
+          "stddev_ts": 0.015942,
+          "samples_ns": [
+            53128420418,
+            53227529571,
+            53052161233
+          ],
+          "samples_ts": [
+            9.63703,
+            9.61908,
+            9.65088
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 707
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:21:18.014710+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:20:31Z\",\n    \"avg_ns\": 1759353249,\n    \"stddev_ns\": 1541222,\n    \"avg_ts\": 72.754045,\n    \"stddev_ts\": 0.063687,\n    \"samples_ns\": [ 1758869976, 1761077601, 1758112171 ],\n    \"samples_ts\": [ 72.774, 72.6828, 72.8054 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:20:38Z\",\n    \"avg_ns\": 13057839779,\n    \"stddev_ns\": 162042293,\n    \"avg_ts\": 9.803554,\n    \"stddev_ts\": 0.122509,\n    \"samples_ns\": [ 12871374565, 13164516152, 13137628621 ],\n    \"samples_ts\": [ 9.94455, 9.72311, 9.74301 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:20:31Z",
+          "avg_ns": 1759353249,
+          "stddev_ns": 1541222,
+          "avg_ts": 72.754045,
+          "stddev_ts": 0.063687,
+          "samples_ns": [
+            1758869976,
+            1761077601,
+            1758112171
+          ],
+          "samples_ts": [
+            72.774,
+            72.6828,
+            72.8054
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:20:38Z",
+          "avg_ns": 13057839779,
+          "stddev_ns": 162042293,
+          "avg_ts": 9.803554,
+          "stddev_ts": 0.122509,
+          "samples_ns": [
+            12871374565,
+            13164516152,
+            13137628621
+          ],
+          "samples_ts": [
+            9.94455,
+            9.72311,
+            9.74301
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 708
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:24:02.838475+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:21:18Z\",\n    \"avg_ns\": 1761057826,\n    \"stddev_ns\": 3141067,\n    \"avg_ts\": 72.683741,\n    \"stddev_ts\": 0.129608,\n    \"samples_ns\": [ 1758075382, 1760761570, 1764336526 ],\n    \"samples_ts\": [ 72.8069, 72.6958, 72.5485 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:21:25Z\",\n    \"avg_ns\": 52230620541,\n    \"stddev_ns\": 819613570,\n    \"avg_ts\": 9.804296,\n    \"stddev_ts\": 0.154614,\n    \"samples_ns\": [ 52947253321, 52407649318, 51336958985 ],\n    \"samples_ts\": [ 9.67, 9.76957, 9.97332 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:21:18Z",
+          "avg_ns": 1761057826,
+          "stddev_ns": 3141067,
+          "avg_ts": 72.683741,
+          "stddev_ts": 0.129608,
+          "samples_ns": [
+            1758075382,
+            1760761570,
+            1764336526
+          ],
+          "samples_ts": [
+            72.8069,
+            72.6958,
+            72.5485
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:21:25Z",
+          "avg_ns": 52230620541,
+          "stddev_ns": 819613570,
+          "avg_ts": 9.804296,
+          "stddev_ts": 0.154614,
+          "samples_ns": [
+            52947253321,
+            52407649318,
+            51336958985
+          ],
+          "samples_ts": [
+            9.67,
+            9.76957,
+            9.97332
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 709
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:25:11.426146+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:24:03Z\",\n    \"avg_ns\": 7032494637,\n    \"stddev_ns\": 2520213,\n    \"avg_ts\": 72.804896,\n    \"stddev_ts\": 0.026057,\n    \"samples_ns\": [ 7030856659, 7035393334, 7031233920 ],\n    \"samples_ts\": [ 72.8219, 72.7749, 72.8179 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:24:31Z\",\n    \"avg_ns\": 13121681900,\n    \"stddev_ns\": 82247686,\n    \"avg_ts\": 9.755103,\n    \"stddev_ts\": 0.061367,\n    \"samples_ns\": [ 13026737340, 13171104436, 13167203926 ],\n    \"samples_ts\": [ 9.82594, 9.71824, 9.72112 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:24:03Z",
+          "avg_ns": 7032494637,
+          "stddev_ns": 2520213,
+          "avg_ts": 72.804896,
+          "stddev_ts": 0.026057,
+          "samples_ns": [
+            7030856659,
+            7035393334,
+            7031233920
+          ],
+          "samples_ts": [
+            72.8219,
+            72.7749,
+            72.8179
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:24:31Z",
+          "avg_ns": 13121681900,
+          "stddev_ns": 82247686,
+          "avg_ts": 9.755103,
+          "stddev_ts": 0.061367,
+          "samples_ns": [
+            13026737340,
+            13171104436,
+            13167203926
+          ],
+          "samples_ts": [
+            9.82594,
+            9.71824,
+            9.72112
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 710
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:28:20.285373+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:25:12Z\",\n    \"avg_ns\": 7029995595,\n    \"stddev_ns\": 2762532,\n    \"avg_ts\": 72.830779,\n    \"stddev_ts\": 0.028613,\n    \"samples_ns\": [ 7033181617, 7028266447, 7028538721 ],\n    \"samples_ts\": [ 72.7978, 72.8487, 72.8459 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:25:40Z\",\n    \"avg_ns\": 53215002274,\n    \"stddev_ns\": 68948605,\n    \"avg_ts\": 9.621358,\n    \"stddev_ts\": 0.012475,\n    \"samples_ns\": [ 53135803391, 53247569191, 53261634242 ],\n    \"samples_ts\": [ 9.63569, 9.61546, 9.61292 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:25:12Z",
+          "avg_ns": 7029995595,
+          "stddev_ns": 2762532,
+          "avg_ts": 72.830779,
+          "stddev_ts": 0.028613,
+          "samples_ns": [
+            7033181617,
+            7028266447,
+            7028538721
+          ],
+          "samples_ts": [
+            72.7978,
+            72.8487,
+            72.8459
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:25:40Z",
+          "avg_ns": 53215002274,
+          "stddev_ns": 68948605,
+          "avg_ts": 9.621358,
+          "stddev_ts": 0.012475,
+          "samples_ns": [
+            53135803391,
+            53247569191,
+            53261634242
+          ],
+          "samples_ts": [
+            9.63569,
+            9.61546,
+            9.61292
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 711
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:29:07.817672+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:28:21Z\",\n    \"avg_ns\": 1753749821,\n    \"stddev_ns\": 372872,\n    \"avg_ts\": 72.986467,\n    \"stddev_ts\": 0.015518,\n    \"samples_ns\": [ 1753772911, 1753365940, 1754110612 ],\n    \"samples_ts\": [ 72.9855, 73.0024, 72.9715 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:28:28Z\",\n    \"avg_ns\": 13142420216,\n    \"stddev_ns\": 153019418,\n    \"avg_ts\": 9.740340,\n    \"stddev_ts\": 0.114176,\n    \"samples_ns\": [ 12965733681, 13229615783, 13231911186 ],\n    \"samples_ts\": [ 9.87218, 9.67526, 9.67358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:28:21Z",
+          "avg_ns": 1753749821,
+          "stddev_ns": 372872,
+          "avg_ts": 72.986467,
+          "stddev_ts": 0.015518,
+          "samples_ns": [
+            1753772911,
+            1753365940,
+            1754110612
+          ],
+          "samples_ts": [
+            72.9855,
+            73.0024,
+            72.9715
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:28:28Z",
+          "avg_ns": 13142420216,
+          "stddev_ns": 153019418,
+          "avg_ts": 9.74034,
+          "stddev_ts": 0.114176,
+          "samples_ns": [
+            12965733681,
+            13229615783,
+            13231911186
+          ],
+          "samples_ts": [
+            9.87218,
+            9.67526,
+            9.67358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 712
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:31:52.119323+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:29:08Z\",\n    \"avg_ns\": 1756596793,\n    \"stddev_ns\": 1263263,\n    \"avg_ts\": 72.868199,\n    \"stddev_ts\": 0.052349,\n    \"samples_ns\": [ 1757823636, 1755302590, 1756664155 ],\n    \"samples_ts\": [ 72.8173, 72.9219, 72.8654 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:29:15Z\",\n    \"avg_ns\": 52059580331,\n    \"stddev_ns\": 822595033,\n    \"avg_ts\": 9.836527,\n    \"stddev_ts\": 0.155870,\n    \"samples_ns\": [ 52829965096, 52155577124, 51193198774 ],\n    \"samples_ts\": [ 9.69147, 9.81678, 10.0013 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:29:08Z",
+          "avg_ns": 1756596793,
+          "stddev_ns": 1263263,
+          "avg_ts": 72.868199,
+          "stddev_ts": 0.052349,
+          "samples_ns": [
+            1757823636,
+            1755302590,
+            1756664155
+          ],
+          "samples_ts": [
+            72.8173,
+            72.9219,
+            72.8654
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:29:15Z",
+          "avg_ns": 52059580331,
+          "stddev_ns": 822595033,
+          "avg_ts": 9.836527,
+          "stddev_ts": 0.15587,
+          "samples_ns": [
+            52829965096,
+            52155577124,
+            51193198774
+          ],
+          "samples_ts": [
+            9.69147,
+            9.81678,
+            10.0013
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 713
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:33:00.937217+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:31:53Z\",\n    \"avg_ns\": 7051597892,\n    \"stddev_ns\": 3072229,\n    \"avg_ts\": 72.607666,\n    \"stddev_ts\": 0.031620,\n    \"samples_ns\": [ 7054825030, 7048711246, 7051257401 ],\n    \"samples_ts\": [ 72.5744, 72.6374, 72.6112 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:32:21Z\",\n    \"avg_ns\": 13163435010,\n    \"stddev_ns\": 74467810,\n    \"avg_ts\": 9.724114,\n    \"stddev_ts\": 0.055168,\n    \"samples_ns\": [ 13078701320, 13218474383, 13193129328 ],\n    \"samples_ts\": [ 9.7869, 9.68342, 9.70202 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:31:53Z",
+          "avg_ns": 7051597892,
+          "stddev_ns": 3072229,
+          "avg_ts": 72.607666,
+          "stddev_ts": 0.03162,
+          "samples_ns": [
+            7054825030,
+            7048711246,
+            7051257401
+          ],
+          "samples_ts": [
+            72.5744,
+            72.6374,
+            72.6112
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:32:21Z",
+          "avg_ns": 13163435010,
+          "stddev_ns": 74467810,
+          "avg_ts": 9.724114,
+          "stddev_ts": 0.055168,
+          "samples_ns": [
+            13078701320,
+            13218474383,
+            13193129328
+          ],
+          "samples_ts": [
+            9.7869,
+            9.68342,
+            9.70202
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 714
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:36:09.064996+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:33:01Z\",\n    \"avg_ns\": 7043573336,\n    \"stddev_ns\": 3512732,\n    \"avg_ts\": 72.690389,\n    \"stddev_ts\": 0.036247,\n    \"samples_ns\": [ 7047351480, 7040406175, 7042962353 ],\n    \"samples_ts\": [ 72.6514, 72.7231, 72.6967 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:33:30Z\",\n    \"avg_ns\": 52947628174,\n    \"stddev_ns\": 608563264,\n    \"avg_ts\": 9.670790,\n    \"stddev_ts\": 0.111893,\n    \"samples_ns\": [ 53281989703, 53315705215, 52245189606 ],\n    \"samples_ts\": [ 9.60925, 9.60317, 9.79995 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:33:01Z",
+          "avg_ns": 7043573336,
+          "stddev_ns": 3512732,
+          "avg_ts": 72.690389,
+          "stddev_ts": 0.036247,
+          "samples_ns": [
+            7047351480,
+            7040406175,
+            7042962353
+          ],
+          "samples_ts": [
+            72.6514,
+            72.7231,
+            72.6967
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:33:30Z",
+          "avg_ns": 52947628174,
+          "stddev_ns": 608563264,
+          "avg_ts": 9.67079,
+          "stddev_ts": 0.111893,
+          "samples_ns": [
+            53281989703,
+            53315705215,
+            52245189606
+          ],
+          "samples_ts": [
+            9.60925,
+            9.60317,
+            9.79995
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 715
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:36:55.219727+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:36:09Z\",\n    \"avg_ns\": 1758172778,\n    \"stddev_ns\": 1064741,\n    \"avg_ts\": 72.802874,\n    \"stddev_ts\": 0.044046,\n    \"samples_ns\": [ 1759334853, 1757246628, 1757936854 ],\n    \"samples_ts\": [ 72.7548, 72.8412, 72.8126 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:36:16Z\",\n    \"avg_ns\": 12680027422,\n    \"stddev_ns\": 14121531,\n    \"avg_ts\": 10.094624,\n    \"stddev_ts\": 0.011249,\n    \"samples_ns\": [ 12689478456, 12663794802, 12686809009 ],\n    \"samples_ts\": [ 10.0871, 10.1076, 10.0892 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:36:09Z",
+          "avg_ns": 1758172778,
+          "stddev_ns": 1064741,
+          "avg_ts": 72.802874,
+          "stddev_ts": 0.044046,
+          "samples_ns": [
+            1759334853,
+            1757246628,
+            1757936854
+          ],
+          "samples_ts": [
+            72.7548,
+            72.8412,
+            72.8126
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:36:16Z",
+          "avg_ns": 12680027422,
+          "stddev_ns": 14121531,
+          "avg_ts": 10.094624,
+          "stddev_ts": 0.011249,
+          "samples_ns": [
+            12689478456,
+            12663794802,
+            12686809009
+          ],
+          "samples_ts": [
+            10.0871,
+            10.1076,
+            10.0892
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 716
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:39:37.376667+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:36:56Z\",\n    \"avg_ns\": 1753012068,\n    \"stddev_ns\": 1167936,\n    \"avg_ts\": 73.017203,\n    \"stddev_ts\": 0.048638,\n    \"samples_ns\": [ 1752784234, 1751974836, 1754277134 ],\n    \"samples_ts\": [ 73.0267, 73.0604, 72.9645 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:37:03Z\",\n    \"avg_ns\": 51339888895,\n    \"stddev_ns\": 116856046,\n    \"avg_ts\": 9.972787,\n    \"stddev_ts\": 0.022671,\n    \"samples_ns\": [ 51473850126, 51258905575, 51286910985 ],\n    \"samples_ts\": [ 9.9468, 9.98851, 9.98305 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:36:56Z",
+          "avg_ns": 1753012068,
+          "stddev_ns": 1167936,
+          "avg_ts": 73.017203,
+          "stddev_ts": 0.048638,
+          "samples_ns": [
+            1752784234,
+            1751974836,
+            1754277134
+          ],
+          "samples_ts": [
+            73.0267,
+            73.0604,
+            72.9645
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:37:03Z",
+          "avg_ns": 51339888895,
+          "stddev_ns": 116856046,
+          "avg_ts": 9.972787,
+          "stddev_ts": 0.022671,
+          "samples_ns": [
+            51473850126,
+            51258905575,
+            51286910985
+          ],
+          "samples_ts": [
+            9.9468,
+            9.98851,
+            9.98305
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 717
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:40:47.509198+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:39:38Z\",\n    \"avg_ns\": 7350694791,\n    \"stddev_ns\": 11634054,\n    \"avg_ts\": 69.653396,\n    \"stddev_ts\": 0.110139,\n    \"samples_ns\": [ 7344518039, 7364114173, 7343452162 ],\n    \"samples_ts\": [ 69.7119, 69.5264, 69.722 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:40:07Z\",\n    \"avg_ns\": 13215382182,\n    \"stddev_ns\": 65201268,\n    \"avg_ts\": 9.685840,\n    \"stddev_ts\": 0.047761,\n    \"samples_ns\": [ 13152778212, 13210466271, 13282902065 ],\n    \"samples_ts\": [ 9.73178, 9.68929, 9.63645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:39:38Z",
+          "avg_ns": 7350694791,
+          "stddev_ns": 11634054,
+          "avg_ts": 69.653396,
+          "stddev_ts": 0.110139,
+          "samples_ns": [
+            7344518039,
+            7364114173,
+            7343452162
+          ],
+          "samples_ts": [
+            69.7119,
+            69.5264,
+            69.722
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:40:07Z",
+          "avg_ns": 13215382182,
+          "stddev_ns": 65201268,
+          "avg_ts": 9.68584,
+          "stddev_ts": 0.047761,
+          "samples_ns": [
+            13152778212,
+            13210466271,
+            13282902065
+          ],
+          "samples_ts": [
+            9.73178,
+            9.68929,
+            9.63645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 718
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:43:54.983602+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:40:48Z\",\n    \"avg_ns\": 7351543765,\n    \"stddev_ns\": 3319393,\n    \"avg_ts\": 69.645245,\n    \"stddev_ts\": 0.031440,\n    \"samples_ns\": [ 7348873877, 7350497082, 7355260336 ],\n    \"samples_ts\": [ 69.6705, 69.6552, 69.61 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 1B Q8_0\",\n    \"model_size\": 1062773248,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:41:17Z\",\n    \"avg_ns\": 52318347495,\n    \"stddev_ns\": 983930251,\n    \"avg_ts\": 9.788556,\n    \"stddev_ts\": 0.184584,\n    \"samples_ns\": [ 53255080452, 52406772865, 51293189170 ],\n    \"samples_ts\": [ 9.61411, 9.76973, 9.98183 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:40:48Z",
+          "avg_ns": 7351543765,
+          "stddev_ns": 3319393,
+          "avg_ts": 69.645245,
+          "stddev_ts": 0.03144,
+          "samples_ns": [
+            7348873877,
+            7350497082,
+            7355260336
+          ],
+          "samples_ts": [
+            69.6705,
+            69.6552,
+            69.61
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+          "model_type": "gemma3 1B Q8_0",
+          "model_size": 1062773248,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:41:17Z",
+          "avg_ns": 52318347495,
+          "stddev_ns": 983930251,
+          "avg_ts": 9.788556,
+          "stddev_ts": 0.184584,
+          "samples_ns": [
+            53255080452,
+            52406772865,
+            51293189170
+          ],
+          "samples_ts": [
+            9.61411,
+            9.76973,
+            9.98183
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 719
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:45:06.659897+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:44:03Z\",\n    \"avg_ns\": 6346378053,\n    \"stddev_ns\": 13945955,\n    \"avg_ts\": 20.169049,\n    \"stddev_ts\": 0.044352,\n    \"samples_ns\": [ 6358367139, 6349693363, 6331073659 ],\n    \"samples_ts\": [ 20.131, 20.1585, 20.2177 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:44:29Z\",\n    \"avg_ns\": 12244123042,\n    \"stddev_ns\": 19521728,\n    \"avg_ts\": 10.454013,\n    \"stddev_ts\": 0.016668,\n    \"samples_ns\": [ 12263454178, 12244498803, 12224416145 ],\n    \"samples_ts\": [ 10.4375, 10.4537, 10.4708 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:44:03Z",
+          "avg_ns": 6346378053,
+          "stddev_ns": 13945955,
+          "avg_ts": 20.169049,
+          "stddev_ts": 0.044352,
+          "samples_ns": [
+            6358367139,
+            6349693363,
+            6331073659
+          ],
+          "samples_ts": [
+            20.131,
+            20.1585,
+            20.2177
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:44:29Z",
+          "avg_ns": 12244123042,
+          "stddev_ns": 19521728,
+          "avg_ts": 10.454013,
+          "stddev_ts": 0.016668,
+          "samples_ns": [
+            12263454178,
+            12244498803,
+            12224416145
+          ],
+          "samples_ts": [
+            10.4375,
+            10.4537,
+            10.4708
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 720
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:48:02.209748+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:45:07Z\",\n    \"avg_ns\": 6331826699,\n    \"stddev_ns\": 4810396,\n    \"avg_ts\": 20.215343,\n    \"stddev_ts\": 0.015362,\n    \"samples_ns\": [ 6333965062, 6335196452, 6326318584 ],\n    \"samples_ts\": [ 20.2085, 20.2046, 20.2329 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:45:33Z\",\n    \"avg_ns\": 49662778311,\n    \"stddev_ns\": 3104579,\n    \"avg_ts\": 10.309532,\n    \"stddev_ts\": 0.000641,\n    \"samples_ns\": [ 49664693724, 49659215325, 49664425886 ],\n    \"samples_ts\": [ 10.3091, 10.3103, 10.3092 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:45:07Z",
+          "avg_ns": 6331826699,
+          "stddev_ns": 4810396,
+          "avg_ts": 20.215343,
+          "stddev_ts": 0.015362,
+          "samples_ns": [
+            6333965062,
+            6335196452,
+            6326318584
+          ],
+          "samples_ts": [
+            20.2085,
+            20.2046,
+            20.2329
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:45:33Z",
+          "avg_ns": 49662778311,
+          "stddev_ns": 3104579,
+          "avg_ts": 10.309532,
+          "stddev_ts": 0.000641,
+          "samples_ns": [
+            49664693724,
+            49659215325,
+            49664425886
+          ],
+          "samples_ts": [
+            10.3091,
+            10.3103,
+            10.3092
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 721
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:50:22.090604+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:48:03Z\",\n    \"avg_ns\": 25544015100,\n    \"stddev_ns\": 658879,\n    \"avg_ts\": 20.043834,\n    \"stddev_ts\": 0.000502,\n    \"samples_ns\": [ 25544738562, 25543779983, 25543526756 ],\n    \"samples_ts\": [ 20.0433, 20.044, 20.0442 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:49:45Z\",\n    \"avg_ns\": 12176544387,\n    \"stddev_ns\": 1586433,\n    \"avg_ts\": 10.512014,\n    \"stddev_ts\": 0.001363,\n    \"samples_ns\": [ 12175160575, 12176208570, 12178264018 ],\n    \"samples_ts\": [ 10.5132, 10.5123, 10.5105 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:48:03Z",
+          "avg_ns": 25544015100,
+          "stddev_ns": 658879,
+          "avg_ts": 20.043834,
+          "stddev_ts": 0.000502,
+          "samples_ns": [
+            25544738562,
+            25543779983,
+            25543526756
+          ],
+          "samples_ts": [
+            20.0433,
+            20.044,
+            20.0442
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:49:45Z",
+          "avg_ns": 12176544387,
+          "stddev_ns": 1586433,
+          "avg_ts": 10.512014,
+          "stddev_ts": 0.001363,
+          "samples_ns": [
+            12175160575,
+            12176208570,
+            12178264018
+          ],
+          "samples_ts": [
+            10.5132,
+            10.5123,
+            10.5105
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 722
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:54:33.230675+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:50:23Z\",\n    \"avg_ns\": 25543222025,\n    \"stddev_ns\": 619979,\n    \"avg_ts\": 20.044456,\n    \"stddev_ts\": 0.000453,\n    \"samples_ns\": [ 25543620284, 25543485855, 25542559938 ],\n    \"samples_ts\": [ 20.0441, 20.0442, 20.045 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:52:05Z\",\n    \"avg_ns\": 49272009652,\n    \"stddev_ns\": 3175031,\n    \"avg_ts\": 10.391295,\n    \"stddev_ts\": 0.000666,\n    \"samples_ns\": [ 49275653728, 49270338678, 49270036552 ],\n    \"samples_ts\": [ 10.3905, 10.3916, 10.3917 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:50:23Z",
+          "avg_ns": 25543222025,
+          "stddev_ns": 619979,
+          "avg_ts": 20.044456,
+          "stddev_ts": 0.000453,
+          "samples_ns": [
+            25543620284,
+            25543485855,
+            25542559938
+          ],
+          "samples_ts": [
+            20.0441,
+            20.0442,
+            20.045
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:52:05Z",
+          "avg_ns": 49272009652,
+          "stddev_ns": 3175031,
+          "avg_ts": 10.391295,
+          "stddev_ts": 0.000666,
+          "samples_ns": [
+            49275653728,
+            49270338678,
+            49270036552
+          ],
+          "samples_ts": [
+            10.3905,
+            10.3916,
+            10.3917
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 723
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:55:36.182631+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:54:34Z\",\n    \"avg_ns\": 6327770427,\n    \"stddev_ns\": 191981,\n    \"avg_ts\": 20.228294,\n    \"stddev_ts\": 0.000559,\n    \"samples_ns\": [ 6327583392, 6327798428, 6327929462 ],\n    \"samples_ts\": [ 20.2289, 20.2282, 20.2278 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:54:59Z\",\n    \"avg_ns\": 12163340138,\n    \"stddev_ns\": 1391301,\n    \"avg_ts\": 10.523425,\n    \"stddev_ts\": 0.001200,\n    \"samples_ns\": [ 12163948184, 12164319185, 12161753046 ],\n    \"samples_ts\": [ 10.5229, 10.5226, 10.5248 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:54:34Z",
+          "avg_ns": 6327770427,
+          "stddev_ns": 191981,
+          "avg_ts": 20.228294,
+          "stddev_ts": 0.000559,
+          "samples_ns": [
+            6327583392,
+            6327798428,
+            6327929462
+          ],
+          "samples_ts": [
+            20.2289,
+            20.2282,
+            20.2278
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:54:59Z",
+          "avg_ns": 12163340138,
+          "stddev_ns": 1391301,
+          "avg_ts": 10.523425,
+          "stddev_ts": 0.0012,
+          "samples_ns": [
+            12163948184,
+            12164319185,
+            12161753046
+          ],
+          "samples_ts": [
+            10.5229,
+            10.5226,
+            10.5248
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 724
+    },
+    {
+      "timestamp_utc": "2025-12-09T11:58:31.868082+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:55:37Z\",\n    \"avg_ns\": 6327754911,\n    \"stddev_ns\": 110503,\n    \"avg_ts\": 20.228344,\n    \"stddev_ts\": 0.000245,\n    \"samples_ns\": [ 6327843368, 6327706825, 6327714541 ],\n    \"samples_ts\": [ 20.2281, 20.2285, 20.2285 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:56:02Z\",\n    \"avg_ns\": 49741928123,\n    \"stddev_ns\": 490280,\n    \"avg_ts\": 10.293127,\n    \"stddev_ts\": 0.000101,\n    \"samples_ns\": [ 49742480688, 49741758514, 49741545167 ],\n    \"samples_ts\": [ 10.293, 10.2932, 10.2932 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:55:37Z",
+          "avg_ns": 6327754911,
+          "stddev_ns": 110503,
+          "avg_ts": 20.228344,
+          "stddev_ts": 0.000245,
+          "samples_ns": [
+            6327843368,
+            6327706825,
+            6327714541
+          ],
+          "samples_ts": [
+            20.2281,
+            20.2285,
+            20.2285
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:56:02Z",
+          "avg_ns": 49741928123,
+          "stddev_ns": 490280,
+          "avg_ts": 10.293127,
+          "stddev_ts": 0.000101,
+          "samples_ns": [
+            49742480688,
+            49741758514,
+            49741545167
+          ],
+          "samples_ts": [
+            10.293,
+            10.2932,
+            10.2932
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 725
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:00:51.978038+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T11:58:32Z\",\n    \"avg_ns\": 25580859037,\n    \"stddev_ns\": 716685,\n    \"avg_ts\": 20.014965,\n    \"stddev_ts\": 0.000561,\n    \"samples_ns\": [ 25580960552, 25581519552, 25580097007 ],\n    \"samples_ts\": [ 20.0149, 20.0144, 20.0156 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:00:15Z\",\n    \"avg_ns\": 12210325221,\n    \"stddev_ns\": 2100318,\n    \"avg_ts\": 10.482931,\n    \"stddev_ts\": 0.001798,\n    \"samples_ns\": [ 12212003122, 12210994716, 12207977827 ],\n    \"samples_ts\": [ 10.4815, 10.4824, 10.4849 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T11:58:32Z",
+          "avg_ns": 25580859037,
+          "stddev_ns": 716685,
+          "avg_ts": 20.014965,
+          "stddev_ts": 0.000561,
+          "samples_ns": [
+            25580960552,
+            25581519552,
+            25580097007
+          ],
+          "samples_ts": [
+            20.0149,
+            20.0144,
+            20.0156
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:00:15Z",
+          "avg_ns": 12210325221,
+          "stddev_ns": 2100318,
+          "avg_ts": 10.482931,
+          "stddev_ts": 0.001798,
+          "samples_ns": [
+            12212003122,
+            12210994716,
+            12207977827
+          ],
+          "samples_ts": [
+            10.4815,
+            10.4824,
+            10.4849
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 726
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:05:02.869931+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:00:52Z\",\n    \"avg_ns\": 25574412178,\n    \"stddev_ns\": 462044,\n    \"avg_ts\": 20.020010,\n    \"stddev_ts\": 0.000362,\n    \"samples_ns\": [ 25574596506, 25573886421, 25574753607 ],\n    \"samples_ts\": [ 20.0199, 20.0204, 20.0197 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:02:35Z\",\n    \"avg_ns\": 49139839282,\n    \"stddev_ns\": 1951498,\n    \"avg_ts\": 10.419245,\n    \"stddev_ts\": 0.000414,\n    \"samples_ns\": [ 49141822625, 49139773952, 49137921269 ],\n    \"samples_ts\": [ 10.4188, 10.4193, 10.4197 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:00:52Z",
+          "avg_ns": 25574412178,
+          "stddev_ns": 462044,
+          "avg_ts": 20.02001,
+          "stddev_ts": 0.000362,
+          "samples_ns": [
+            25574596506,
+            25573886421,
+            25574753607
+          ],
+          "samples_ts": [
+            20.0199,
+            20.0204,
+            20.0197
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:02:35Z",
+          "avg_ns": 49139839282,
+          "stddev_ns": 1951498,
+          "avg_ts": 10.419245,
+          "stddev_ts": 0.000414,
+          "samples_ns": [
+            49141822625,
+            49139773952,
+            49137921269
+          ],
+          "samples_ts": [
+            10.4188,
+            10.4193,
+            10.4197
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 727
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:06:05.921909+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:05:03Z\",\n    \"avg_ns\": 6327509304,\n    \"stddev_ns\": 491712,\n    \"avg_ts\": 20.229129,\n    \"stddev_ts\": 0.001572,\n    \"samples_ns\": [ 6327170377, 6327284271, 6328073264 ],\n    \"samples_ts\": [ 20.2302, 20.2298, 20.2273 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:05:29Z\",\n    \"avg_ns\": 12189598281,\n    \"stddev_ns\": 2753148,\n    \"avg_ts\": 10.500757,\n    \"stddev_ts\": 0.002372,\n    \"samples_ns\": [ 12192417699, 12186916571, 12189460573 ],\n    \"samples_ts\": [ 10.4983, 10.5031, 10.5009 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:05:03Z",
+          "avg_ns": 6327509304,
+          "stddev_ns": 491712,
+          "avg_ts": 20.229129,
+          "stddev_ts": 0.001572,
+          "samples_ns": [
+            6327170377,
+            6327284271,
+            6328073264
+          ],
+          "samples_ts": [
+            20.2302,
+            20.2298,
+            20.2273
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:05:29Z",
+          "avg_ns": 12189598281,
+          "stddev_ns": 2753148,
+          "avg_ts": 10.500757,
+          "stddev_ts": 0.002372,
+          "samples_ns": [
+            12192417699,
+            12186916571,
+            12189460573
+          ],
+          "samples_ts": [
+            10.4983,
+            10.5031,
+            10.5009
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 728
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:09:00.094783+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:06:06Z\",\n    \"avg_ns\": 6328959606,\n    \"stddev_ns\": 275609,\n    \"avg_ts\": 20.224493,\n    \"stddev_ts\": 0.000881,\n    \"samples_ns\": [ 6328750878, 6328855920, 6329272020 ],\n    \"samples_ts\": [ 20.2252, 20.2248, 20.2235 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:06:32Z\",\n    \"avg_ns\": 49227996245,\n    \"stddev_ns\": 1757254,\n    \"avg_ts\": 10.400586,\n    \"stddev_ts\": 0.000365,\n    \"samples_ns\": [ 49229603882, 49228217667, 49226167188 ],\n    \"samples_ts\": [ 10.4002, 10.4005, 10.401 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:06:06Z",
+          "avg_ns": 6328959606,
+          "stddev_ns": 275609,
+          "avg_ts": 20.224493,
+          "stddev_ts": 0.000881,
+          "samples_ns": [
+            6328750878,
+            6328855920,
+            6329272020
+          ],
+          "samples_ts": [
+            20.2252,
+            20.2248,
+            20.2235
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:06:32Z",
+          "avg_ns": 49227996245,
+          "stddev_ns": 1757254,
+          "avg_ts": 10.400586,
+          "stddev_ts": 0.000365,
+          "samples_ns": [
+            49229603882,
+            49228217667,
+            49226167188
+          ],
+          "samples_ts": [
+            10.4002,
+            10.4005,
+            10.401
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 729
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:11:21.597220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:09:01Z\",\n    \"avg_ns\": 25958183407,\n    \"stddev_ns\": 682599,\n    \"avg_ts\": 19.724030,\n    \"stddev_ts\": 0.000519,\n    \"samples_ns\": [ 25957563020, 25958914649, 25958072552 ],\n    \"samples_ts\": [ 19.7245, 19.7235, 19.7241 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:10:44Z\",\n    \"avg_ns\": 12153636676,\n    \"stddev_ns\": 3233643,\n    \"avg_ts\": 10.531828,\n    \"stddev_ts\": 0.002799,\n    \"samples_ns\": [ 12156077086, 12154858898, 12149974046 ],\n    \"samples_ts\": [ 10.5297, 10.5308, 10.535 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:09:01Z",
+          "avg_ns": 25958183407,
+          "stddev_ns": 682599,
+          "avg_ts": 19.72403,
+          "stddev_ts": 0.000519,
+          "samples_ns": [
+            25957563020,
+            25958914649,
+            25958072552
+          ],
+          "samples_ts": [
+            19.7245,
+            19.7235,
+            19.7241
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:10:44Z",
+          "avg_ns": 12153636676,
+          "stddev_ns": 3233643,
+          "avg_ts": 10.531828,
+          "stddev_ts": 0.002799,
+          "samples_ns": [
+            12156077086,
+            12154858898,
+            12149974046
+          ],
+          "samples_ts": [
+            10.5297,
+            10.5308,
+            10.535
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 730
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:15:34.171505+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:11:22Z\",\n    \"avg_ns\": 25946500206,\n    \"stddev_ns\": 335753,\n    \"avg_ts\": 19.732912,\n    \"stddev_ts\": 0.000255,\n    \"samples_ns\": [ 25946798331, 25946565787, 25946136500 ],\n    \"samples_ts\": [ 19.7327, 19.7329, 19.7332 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:13:06Z\",\n    \"avg_ns\": 49190482670,\n    \"stddev_ns\": 3281533,\n    \"avg_ts\": 10.408518,\n    \"stddev_ts\": 0.000693,\n    \"samples_ns\": [ 49194204390, 49188046595, 49189197026 ],\n    \"samples_ts\": [ 10.4077, 10.409, 10.4088 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:11:22Z",
+          "avg_ns": 25946500206,
+          "stddev_ns": 335753,
+          "avg_ts": 19.732912,
+          "stddev_ts": 0.000255,
+          "samples_ns": [
+            25946798331,
+            25946565787,
+            25946136500
+          ],
+          "samples_ts": [
+            19.7327,
+            19.7329,
+            19.7332
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:13:06Z",
+          "avg_ns": 49190482670,
+          "stddev_ns": 3281533,
+          "avg_ts": 10.408518,
+          "stddev_ts": 0.000693,
+          "samples_ns": [
+            49194204390,
+            49188046595,
+            49189197026
+          ],
+          "samples_ts": [
+            10.4077,
+            10.409,
+            10.4088
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 731
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:16:37.168737+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:15:35Z\",\n    \"avg_ns\": 6326101583,\n    \"stddev_ns\": 192700,\n    \"avg_ts\": 20.233630,\n    \"stddev_ts\": 0.000616,\n    \"samples_ns\": [ 6325884950, 6326165898, 6326253901 ],\n    \"samples_ts\": [ 20.2343, 20.2334, 20.2331 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:16:00Z\",\n    \"avg_ns\": 12180722366,\n    \"stddev_ns\": 1511458,\n    \"avg_ts\": 10.508408,\n    \"stddev_ts\": 0.001300,\n    \"samples_ns\": [ 12180055251, 12182448240, 12179663608 ],\n    \"samples_ts\": [ 10.509, 10.5069, 10.5093 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:15:35Z",
+          "avg_ns": 6326101583,
+          "stddev_ns": 192700,
+          "avg_ts": 20.23363,
+          "stddev_ts": 0.000616,
+          "samples_ns": [
+            6325884950,
+            6326165898,
+            6326253901
+          ],
+          "samples_ts": [
+            20.2343,
+            20.2334,
+            20.2331
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:16:00Z",
+          "avg_ns": 12180722366,
+          "stddev_ns": 1511458,
+          "avg_ts": 10.508408,
+          "stddev_ts": 0.0013,
+          "samples_ns": [
+            12180055251,
+            12182448240,
+            12179663608
+          ],
+          "samples_ts": [
+            10.509,
+            10.5069,
+            10.5093
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 732
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:19:31.386251+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:16:38Z\",\n    \"avg_ns\": 6326929423,\n    \"stddev_ns\": 615500,\n    \"avg_ts\": 20.230983,\n    \"stddev_ts\": 0.001935,\n    \"samples_ns\": [ 6326743081, 6326439376, 6327605814 ],\n    \"samples_ts\": [ 20.2316, 20.2325, 20.2288 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:17:03Z\",\n    \"avg_ns\": 49251587235,\n    \"stddev_ns\": 1446171,\n    \"avg_ts\": 10.395604,\n    \"stddev_ts\": 0.000298,\n    \"samples_ns\": [ 49250203097, 49251533632, 49253024978 ],\n    \"samples_ts\": [ 10.3959, 10.3956, 10.3953 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:16:38Z",
+          "avg_ns": 6326929423,
+          "stddev_ns": 615500,
+          "avg_ts": 20.230983,
+          "stddev_ts": 0.001935,
+          "samples_ns": [
+            6326743081,
+            6326439376,
+            6327605814
+          ],
+          "samples_ts": [
+            20.2316,
+            20.2325,
+            20.2288
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:17:03Z",
+          "avg_ns": 49251587235,
+          "stddev_ns": 1446171,
+          "avg_ts": 10.395604,
+          "stddev_ts": 0.000298,
+          "samples_ns": [
+            49250203097,
+            49251533632,
+            49253024978
+          ],
+          "samples_ts": [
+            10.3959,
+            10.3956,
+            10.3953
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 733
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:21:51.397534+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:19:32Z\",\n    \"avg_ns\": 25553135188,\n    \"stddev_ns\": 337319,\n    \"avg_ts\": 20.036680,\n    \"stddev_ts\": 0.000233,\n    \"samples_ns\": [ 25553131428, 25553434088, 25552840049 ],\n    \"samples_ts\": [ 20.0367, 20.0364, 20.0369 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:21:14Z\",\n    \"avg_ns\": 12214906238,\n    \"stddev_ns\": 1718601,\n    \"avg_ts\": 10.479000,\n    \"stddev_ts\": 0.001468,\n    \"samples_ns\": [ 12216875539, 12214064956, 12213778221 ],\n    \"samples_ts\": [ 10.4773, 10.4797, 10.48 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:19:32Z",
+          "avg_ns": 25553135188,
+          "stddev_ns": 337319,
+          "avg_ts": 20.03668,
+          "stddev_ts": 0.000233,
+          "samples_ns": [
+            25553131428,
+            25553434088,
+            25552840049
+          ],
+          "samples_ts": [
+            20.0367,
+            20.0364,
+            20.0369
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:21:14Z",
+          "avg_ns": 12214906238,
+          "stddev_ns": 1718601,
+          "avg_ts": 10.479,
+          "stddev_ts": 0.001468,
+          "samples_ns": [
+            12216875539,
+            12214064956,
+            12213778221
+          ],
+          "samples_ts": [
+            10.4773,
+            10.4797,
+            10.48
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 734
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:26:02.689409+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:21:52Z\",\n    \"avg_ns\": 25542318121,\n    \"stddev_ns\": 639979,\n    \"avg_ts\": 20.045166,\n    \"stddev_ts\": 0.000486,\n    \"samples_ns\": [ 25542925493, 25542342089, 25541686782 ],\n    \"samples_ts\": [ 20.0447, 20.0451, 20.0457 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:23:34Z\",\n    \"avg_ns\": 49322107534,\n    \"stddev_ns\": 1509477,\n    \"avg_ts\": 10.380741,\n    \"stddev_ts\": 0.000311,\n    \"samples_ns\": [ 49323776626, 49320972192, 49321573786 ],\n    \"samples_ts\": [ 10.3804, 10.381, 10.3809 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:21:52Z",
+          "avg_ns": 25542318121,
+          "stddev_ns": 639979,
+          "avg_ts": 20.045166,
+          "stddev_ts": 0.000486,
+          "samples_ns": [
+            25542925493,
+            25542342089,
+            25541686782
+          ],
+          "samples_ts": [
+            20.0447,
+            20.0451,
+            20.0457
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:23:34Z",
+          "avg_ns": 49322107534,
+          "stddev_ns": 1509477,
+          "avg_ts": 10.380741,
+          "stddev_ts": 0.000311,
+          "samples_ns": [
+            49323776626,
+            49320972192,
+            49321573786
+          ],
+          "samples_ts": [
+            10.3804,
+            10.381,
+            10.3809
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 735
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:27:05.885381+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:26:03Z\",\n    \"avg_ns\": 6327228557,\n    \"stddev_ns\": 183735,\n    \"avg_ts\": 20.230026,\n    \"stddev_ts\": 0.000530,\n    \"samples_ns\": [ 6327183950, 6327411917, 6327089805 ],\n    \"samples_ts\": [ 20.2302, 20.2294, 20.2305 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:26:28Z\",\n    \"avg_ns\": 12243926320,\n    \"stddev_ns\": 924521,\n    \"avg_ts\": 10.454163,\n    \"stddev_ts\": 0.000789,\n    \"samples_ns\": [ 12244629591, 12244270240, 12242879129 ],\n    \"samples_ts\": [ 10.4536, 10.4539, 10.4551 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:26:03Z",
+          "avg_ns": 6327228557,
+          "stddev_ns": 183735,
+          "avg_ts": 20.230026,
+          "stddev_ts": 0.00053,
+          "samples_ns": [
+            6327183950,
+            6327411917,
+            6327089805
+          ],
+          "samples_ts": [
+            20.2302,
+            20.2294,
+            20.2305
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:26:28Z",
+          "avg_ns": 12243926320,
+          "stddev_ns": 924521,
+          "avg_ts": 10.454163,
+          "stddev_ts": 0.000789,
+          "samples_ns": [
+            12244629591,
+            12244270240,
+            12242879129
+          ],
+          "samples_ts": [
+            10.4536,
+            10.4539,
+            10.4551
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 736
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:29:59.955685+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:27:06Z\",\n    \"avg_ns\": 6327314389,\n    \"stddev_ns\": 208353,\n    \"avg_ts\": 20.229752,\n    \"stddev_ts\": 0.000666,\n    \"samples_ns\": [ 6327160531, 6327551496, 6327231140 ],\n    \"samples_ts\": [ 20.2302, 20.229, 20.23 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:27:32Z\",\n    \"avg_ns\": 49202243663,\n    \"stddev_ns\": 1149195,\n    \"avg_ts\": 10.406030,\n    \"stddev_ts\": 0.000234,\n    \"samples_ns\": [ 49203092655, 49200993544, 49202644792 ],\n    \"samples_ts\": [ 10.4058, 10.4063, 10.4059 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:27:06Z",
+          "avg_ns": 6327314389,
+          "stddev_ns": 208353,
+          "avg_ts": 20.229752,
+          "stddev_ts": 0.000666,
+          "samples_ns": [
+            6327160531,
+            6327551496,
+            6327231140
+          ],
+          "samples_ts": [
+            20.2302,
+            20.229,
+            20.23
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:27:32Z",
+          "avg_ns": 49202243663,
+          "stddev_ns": 1149195,
+          "avg_ts": 10.40603,
+          "stddev_ts": 0.000234,
+          "samples_ns": [
+            49203092655,
+            49200993544,
+            49202644792
+          ],
+          "samples_ts": [
+            10.4058,
+            10.4063,
+            10.4059
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 737
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:32:19.873914+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:30:00Z\",\n    \"avg_ns\": 25582357121,\n    \"stddev_ns\": 1165875,\n    \"avg_ts\": 20.013793,\n    \"stddev_ts\": 0.000895,\n    \"samples_ns\": [ 25583661823, 25581882022, 25581527520 ],\n    \"samples_ts\": [ 20.0128, 20.0142, 20.0144 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:31:43Z\",\n    \"avg_ns\": 12136425495,\n    \"stddev_ns\": 3544996,\n    \"avg_ts\": 10.546763,\n    \"stddev_ts\": 0.003081,\n    \"samples_ns\": [ 12139636055, 12137019357, 12132621073 ],\n    \"samples_ts\": [ 10.544, 10.5462, 10.5501 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:30:00Z",
+          "avg_ns": 25582357121,
+          "stddev_ns": 1165875,
+          "avg_ts": 20.013793,
+          "stddev_ts": 0.000895,
+          "samples_ns": [
+            25583661823,
+            25581882022,
+            25581527520
+          ],
+          "samples_ts": [
+            20.0128,
+            20.0142,
+            20.0144
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:31:43Z",
+          "avg_ns": 12136425495,
+          "stddev_ns": 3544996,
+          "avg_ts": 10.546763,
+          "stddev_ts": 0.003081,
+          "samples_ns": [
+            12139636055,
+            12137019357,
+            12132621073
+          ],
+          "samples_ts": [
+            10.544,
+            10.5462,
+            10.5501
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 738
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:36:31.649599+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:32:20Z\",\n    \"avg_ns\": 25582014244,\n    \"stddev_ns\": 6859012,\n    \"avg_ts\": 20.014062,\n    \"stddev_ts\": 0.005365,\n    \"samples_ns\": [ 25589917849, 25578504963, 25577619920 ],\n    \"samples_ts\": [ 20.0079, 20.0168, 20.0175 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:34:03Z\",\n    \"avg_ns\": 49333262577,\n    \"stddev_ns\": 757244,\n    \"avg_ts\": 10.378393,\n    \"stddev_ts\": 0.000159,\n    \"samples_ns\": [ 49333129002, 49334077721, 49332581008 ],\n    \"samples_ts\": [ 10.3784, 10.3782, 10.3785 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:32:20Z",
+          "avg_ns": 25582014244,
+          "stddev_ns": 6859012,
+          "avg_ts": 20.014062,
+          "stddev_ts": 0.005365,
+          "samples_ns": [
+            25589917849,
+            25578504963,
+            25577619920
+          ],
+          "samples_ts": [
+            20.0079,
+            20.0168,
+            20.0175
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:34:03Z",
+          "avg_ns": 49333262577,
+          "stddev_ns": 757244,
+          "avg_ts": 10.378393,
+          "stddev_ts": 0.000159,
+          "samples_ns": [
+            49333129002,
+            49334077721,
+            49332581008
+          ],
+          "samples_ts": [
+            10.3784,
+            10.3782,
+            10.3785
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 739
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:37:34.782969+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:36:32Z\",\n    \"avg_ns\": 6332009653,\n    \"stddev_ns\": 5247323,\n    \"avg_ts\": 20.214760,\n    \"stddev_ts\": 0.016742,\n    \"samples_ns\": [ 6338066945, 6329080907, 6328881108 ],\n    \"samples_ts\": [ 20.1954, 20.2241, 20.2247 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:36:57Z\",\n    \"avg_ns\": 12214097262,\n    \"stddev_ns\": 923312,\n    \"avg_ts\": 10.479694,\n    \"stddev_ts\": 0.000786,\n    \"samples_ns\": [ 12215116782, 12213833931, 12213341074 ],\n    \"samples_ts\": [ 10.4788, 10.4799, 10.4803 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:36:32Z",
+          "avg_ns": 6332009653,
+          "stddev_ns": 5247323,
+          "avg_ts": 20.21476,
+          "stddev_ts": 0.016742,
+          "samples_ns": [
+            6338066945,
+            6329080907,
+            6328881108
+          ],
+          "samples_ts": [
+            20.1954,
+            20.2241,
+            20.2247
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:36:57Z",
+          "avg_ns": 12214097262,
+          "stddev_ns": 923312,
+          "avg_ts": 10.479694,
+          "stddev_ts": 0.000786,
+          "samples_ns": [
+            12215116782,
+            12213833931,
+            12213341074
+          ],
+          "samples_ts": [
+            10.4788,
+            10.4799,
+            10.4803
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 740
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:40:29.567240+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:37:35Z\",\n    \"avg_ns\": 6327375934,\n    \"stddev_ns\": 1654180,\n    \"avg_ts\": 20.229556,\n    \"stddev_ts\": 0.005282,\n    \"samples_ns\": [ 6329263240, 6326190323, 6326674240 ],\n    \"samples_ts\": [ 20.2235, 20.2333, 20.2318 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:38:01Z\",\n    \"avg_ns\": 49413854190,\n    \"stddev_ns\": 2281330,\n    \"avg_ts\": 10.361467,\n    \"stddev_ts\": 0.000476,\n    \"samples_ns\": [ 49414215267, 49415922490, 49411424814 ],\n    \"samples_ts\": [ 10.3614, 10.361, 10.362 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:37:35Z",
+          "avg_ns": 6327375934,
+          "stddev_ns": 1654180,
+          "avg_ts": 20.229556,
+          "stddev_ts": 0.005282,
+          "samples_ns": [
+            6329263240,
+            6326190323,
+            6326674240
+          ],
+          "samples_ts": [
+            20.2235,
+            20.2333,
+            20.2318
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:38:01Z",
+          "avg_ns": 49413854190,
+          "stddev_ns": 2281330,
+          "avg_ts": 10.361467,
+          "stddev_ts": 0.000476,
+          "samples_ns": [
+            49414215267,
+            49415922490,
+            49411424814
+          ],
+          "samples_ts": [
+            10.3614,
+            10.361,
+            10.362
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 741
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:42:51.246054+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:40:30Z\",\n    \"avg_ns\": 25952712266,\n    \"stddev_ns\": 1222612,\n    \"avg_ts\": 19.728189,\n    \"stddev_ts\": 0.000929,\n    \"samples_ns\": [ 25954071216, 25952364034, 25951701548 ],\n    \"samples_ts\": [ 19.7272, 19.7285, 19.729 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:42:14Z\",\n    \"avg_ns\": 12229105127,\n    \"stddev_ns\": 4694391,\n    \"avg_ts\": 10.466834,\n    \"stddev_ts\": 0.004015,\n    \"samples_ns\": [ 12226099786, 12226704109, 12234511488 ],\n    \"samples_ts\": [ 10.4694, 10.4689, 10.4622 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:40:30Z",
+          "avg_ns": 25952712266,
+          "stddev_ns": 1222612,
+          "avg_ts": 19.728189,
+          "stddev_ts": 0.000929,
+          "samples_ns": [
+            25954071216,
+            25952364034,
+            25951701548
+          ],
+          "samples_ts": [
+            19.7272,
+            19.7285,
+            19.729
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:42:14Z",
+          "avg_ns": 12229105127,
+          "stddev_ns": 4694391,
+          "avg_ts": 10.466834,
+          "stddev_ts": 0.004015,
+          "samples_ns": [
+            12226099786,
+            12226704109,
+            12234511488
+          ],
+          "samples_ts": [
+            10.4694,
+            10.4689,
+            10.4622
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 742
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:47:03.908522+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:42:52Z\",\n    \"avg_ns\": 25944005653,\n    \"stddev_ns\": 2842079,\n    \"avg_ts\": 19.734809,\n    \"stddev_ts\": 0.002158,\n    \"samples_ns\": [ 25942667432, 25947264811, 25942084717 ],\n    \"samples_ts\": [ 19.7358, 19.7323, 19.7363 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:44:36Z\",\n    \"avg_ns\": 49242209277,\n    \"stddev_ns\": 3646087,\n    \"avg_ts\": 10.397584,\n    \"stddev_ts\": 0.000767,\n    \"samples_ns\": [ 49246378729, 49240521069, 49239728035 ],\n    \"samples_ts\": [ 10.3967, 10.3979, 10.3981 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:42:52Z",
+          "avg_ns": 25944005653,
+          "stddev_ns": 2842079,
+          "avg_ts": 19.734809,
+          "stddev_ts": 0.002158,
+          "samples_ns": [
+            25942667432,
+            25947264811,
+            25942084717
+          ],
+          "samples_ts": [
+            19.7358,
+            19.7323,
+            19.7363
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:44:36Z",
+          "avg_ns": 49242209277,
+          "stddev_ns": 3646087,
+          "avg_ts": 10.397584,
+          "stddev_ts": 0.000767,
+          "samples_ns": [
+            49246378729,
+            49240521069,
+            49239728035
+          ],
+          "samples_ts": [
+            10.3967,
+            10.3979,
+            10.3981
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 743
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:48:07.018263+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:47:04Z\",\n    \"avg_ns\": 6327354988,\n    \"stddev_ns\": 196890,\n    \"avg_ts\": 20.229622,\n    \"stddev_ts\": 0.000517,\n    \"samples_ns\": [ 6327181331, 6327382711, 6327500924 ],\n    \"samples_ts\": [ 20.2302, 20.2295, 20.2292 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:47:30Z\",\n    \"avg_ns\": 12212581011,\n    \"stddev_ns\": 3580353,\n    \"avg_ts\": 10.480996,\n    \"stddev_ts\": 0.003071,\n    \"samples_ns\": [ 12216158597, 12209001303, 12212583134 ],\n    \"samples_ts\": [ 10.4779, 10.4841, 10.481 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:47:04Z",
+          "avg_ns": 6327354988,
+          "stddev_ns": 196890,
+          "avg_ts": 20.229622,
+          "stddev_ts": 0.000517,
+          "samples_ns": [
+            6327181331,
+            6327382711,
+            6327500924
+          ],
+          "samples_ts": [
+            20.2302,
+            20.2295,
+            20.2292
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:47:30Z",
+          "avg_ns": 12212581011,
+          "stddev_ns": 3580353,
+          "avg_ts": 10.480996,
+          "stddev_ts": 0.003071,
+          "samples_ns": [
+            12216158597,
+            12209001303,
+            12212583134
+          ],
+          "samples_ts": [
+            10.4779,
+            10.4841,
+            10.481
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 744
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:51:01.638367+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:48:07Z\",\n    \"avg_ns\": 6326496032,\n    \"stddev_ns\": 408002,\n    \"avg_ts\": 20.232369,\n    \"stddev_ts\": 0.001254,\n    \"samples_ns\": [ 6326306183, 6326234894, 6326947021 ],\n    \"samples_ts\": [ 20.233, 20.2332, 20.2309 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:48:33Z\",\n    \"avg_ns\": 49384060030,\n    \"stddev_ns\": 2585437,\n    \"avg_ts\": 10.367718,\n    \"stddev_ts\": 0.000543,\n    \"samples_ns\": [ 49383559838, 49386859017, 49381761235 ],\n    \"samples_ts\": [ 10.3678, 10.3671, 10.3682 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:48:07Z",
+          "avg_ns": 6326496032,
+          "stddev_ns": 408002,
+          "avg_ts": 20.232369,
+          "stddev_ts": 0.001254,
+          "samples_ns": [
+            6326306183,
+            6326234894,
+            6326947021
+          ],
+          "samples_ts": [
+            20.233,
+            20.2332,
+            20.2309
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:48:33Z",
+          "avg_ns": 49384060030,
+          "stddev_ns": 2585437,
+          "avg_ts": 10.367718,
+          "stddev_ts": 0.000543,
+          "samples_ns": [
+            49383559838,
+            49386859017,
+            49381761235
+          ],
+          "samples_ts": [
+            10.3678,
+            10.3671,
+            10.3682
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 745
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:53:21.557566+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:51:02Z\",\n    \"avg_ns\": 25542268719,\n    \"stddev_ns\": 380751,\n    \"avg_ts\": 20.045205,\n    \"stddev_ts\": 0.000299,\n    \"samples_ns\": [ 25542669356, 25542225211, 25541911590 ],\n    \"samples_ts\": [ 20.0449, 20.0452, 20.0455 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:52:44Z\",\n    \"avg_ns\": 12197704081,\n    \"stddev_ns\": 17027534,\n    \"avg_ts\": 10.493792,\n    \"stddev_ts\": 0.014637,\n    \"samples_ns\": [ 12187626652, 12217363705, 12188121886 ],\n    \"samples_ts\": [ 10.5025, 10.4769, 10.502 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:51:02Z",
+          "avg_ns": 25542268719,
+          "stddev_ns": 380751,
+          "avg_ts": 20.045205,
+          "stddev_ts": 0.000299,
+          "samples_ns": [
+            25542669356,
+            25542225211,
+            25541911590
+          ],
+          "samples_ts": [
+            20.0449,
+            20.0452,
+            20.0455
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:52:44Z",
+          "avg_ns": 12197704081,
+          "stddev_ns": 17027534,
+          "avg_ts": 10.493792,
+          "stddev_ts": 0.014637,
+          "samples_ns": [
+            12187626652,
+            12217363705,
+            12188121886
+          ],
+          "samples_ts": [
+            10.5025,
+            10.4769,
+            10.502
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 746
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:57:32.875889+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:53:22Z\",\n    \"avg_ns\": 25541825277,\n    \"stddev_ns\": 816020,\n    \"avg_ts\": 20.045553,\n    \"stddev_ts\": 0.000640,\n    \"samples_ns\": [ 25542111830, 25540904630, 25542459371 ],\n    \"samples_ts\": [ 20.0453, 20.0463, 20.0451 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:55:04Z\",\n    \"avg_ns\": 49311863009,\n    \"stddev_ns\": 2915712,\n    \"avg_ts\": 10.382897,\n    \"stddev_ts\": 0.000610,\n    \"samples_ns\": [ 49315203219, 49310380000, 49310005810 ],\n    \"samples_ts\": [ 10.3822, 10.3832, 10.3833 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:53:22Z",
+          "avg_ns": 25541825277,
+          "stddev_ns": 816020,
+          "avg_ts": 20.045553,
+          "stddev_ts": 0.00064,
+          "samples_ns": [
+            25542111830,
+            25540904630,
+            25542459371
+          ],
+          "samples_ts": [
+            20.0453,
+            20.0463,
+            20.0451
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:55:04Z",
+          "avg_ns": 49311863009,
+          "stddev_ns": 2915712,
+          "avg_ts": 10.382897,
+          "stddev_ts": 0.00061,
+          "samples_ns": [
+            49315203219,
+            49310380000,
+            49310005810
+          ],
+          "samples_ts": [
+            10.3822,
+            10.3832,
+            10.3833
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 747
+    },
+    {
+      "timestamp_utc": "2025-12-09T12:58:35.868252+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:57:33Z\",\n    \"avg_ns\": 6326706819,\n    \"stddev_ns\": 183100,\n    \"avg_ts\": 20.231695,\n    \"stddev_ts\": 0.000462,\n    \"samples_ns\": [ 6326853254, 6326564393, 6326702812 ],\n    \"samples_ts\": [ 20.2312, 20.2322, 20.2317 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:57:59Z\",\n    \"avg_ns\": 12179088574,\n    \"stddev_ns\": 8171541,\n    \"avg_ts\": 10.509821,\n    \"stddev_ts\": 0.007049,\n    \"samples_ns\": [ 12188486028, 12175124683, 12173655011 ],\n    \"samples_ts\": [ 10.5017, 10.5132, 10.5145 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:57:33Z",
+          "avg_ns": 6326706819,
+          "stddev_ns": 183100,
+          "avg_ts": 20.231695,
+          "stddev_ts": 0.000462,
+          "samples_ns": [
+            6326853254,
+            6326564393,
+            6326702812
+          ],
+          "samples_ts": [
+            20.2312,
+            20.2322,
+            20.2317
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:57:59Z",
+          "avg_ns": 12179088574,
+          "stddev_ns": 8171541,
+          "avg_ts": 10.509821,
+          "stddev_ts": 0.007049,
+          "samples_ns": [
+            12188486028,
+            12175124683,
+            12173655011
+          ],
+          "samples_ts": [
+            10.5017,
+            10.5132,
+            10.5145
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 748
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:01:30.183197+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:58:36Z\",\n    \"avg_ns\": 6326971286,\n    \"stddev_ns\": 207064,\n    \"avg_ts\": 20.230849,\n    \"stddev_ts\": 0.000662,\n    \"samples_ns\": [ 6327008625, 6327157140, 6326748093 ],\n    \"samples_ts\": [ 20.2307, 20.2303, 20.2316 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T12:59:02Z\",\n    \"avg_ns\": 49277981835,\n    \"stddev_ns\": 7446511,\n    \"avg_ts\": 10.390036,\n    \"stddev_ts\": 0.001569,\n    \"samples_ns\": [ 49286526981, 49274507144, 49272911381 ],\n    \"samples_ts\": [ 10.3882, 10.3908, 10.3911 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:58:36Z",
+          "avg_ns": 6326971286,
+          "stddev_ns": 207064,
+          "avg_ts": 20.230849,
+          "stddev_ts": 0.000662,
+          "samples_ns": [
+            6327008625,
+            6327157140,
+            6326748093
+          ],
+          "samples_ts": [
+            20.2307,
+            20.2303,
+            20.2316
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T12:59:02Z",
+          "avg_ns": 49277981835,
+          "stddev_ns": 7446511,
+          "avg_ts": 10.390036,
+          "stddev_ts": 0.001569,
+          "samples_ns": [
+            49286526981,
+            49274507144,
+            49272911381
+          ],
+          "samples_ts": [
+            10.3882,
+            10.3908,
+            10.3911
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 749
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:03:50.068676+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:01:31Z\",\n    \"avg_ns\": 25581344597,\n    \"stddev_ns\": 335188,\n    \"avg_ts\": 20.014585,\n    \"stddev_ts\": 0.000230,\n    \"samples_ns\": [ 25581652441, 25581315952, 25581065399 ],\n    \"samples_ts\": [ 20.0143, 20.0146, 20.0148 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:03:13Z\",\n    \"avg_ns\": 12133439733,\n    \"stddev_ns\": 2134813,\n    \"avg_ts\": 10.549358,\n    \"stddev_ts\": 0.001853,\n    \"samples_ns\": [ 12132590442, 12135865460, 12131863298 ],\n    \"samples_ts\": [ 10.5501, 10.5472, 10.5507 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:01:31Z",
+          "avg_ns": 25581344597,
+          "stddev_ns": 335188,
+          "avg_ts": 20.014585,
+          "stddev_ts": 0.00023,
+          "samples_ns": [
+            25581652441,
+            25581315952,
+            25581065399
+          ],
+          "samples_ts": [
+            20.0143,
+            20.0146,
+            20.0148
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:03:13Z",
+          "avg_ns": 12133439733,
+          "stddev_ns": 2134813,
+          "avg_ts": 10.549358,
+          "stddev_ts": 0.001853,
+          "samples_ns": [
+            12132590442,
+            12135865460,
+            12131863298
+          ],
+          "samples_ts": [
+            10.5501,
+            10.5472,
+            10.5507
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 750
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:08:01.182315+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:03:51Z\",\n    \"avg_ns\": 25573447402,\n    \"stddev_ns\": 469420,\n    \"avg_ts\": 20.020766,\n    \"stddev_ts\": 0.000322,\n    \"samples_ns\": [ 25573874725, 25573413339, 25573054144 ],\n    \"samples_ts\": [ 20.0204, 20.0208, 20.0211 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:05:33Z\",\n    \"avg_ns\": 49221326704,\n    \"stddev_ns\": 3630689,\n    \"avg_ts\": 10.401995,\n    \"stddev_ts\": 0.000766,\n    \"samples_ns\": [ 49222247316, 49217331283, 49224401514 ],\n    \"samples_ts\": [ 10.4018, 10.4028, 10.4013 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:03:51Z",
+          "avg_ns": 25573447402,
+          "stddev_ns": 469420,
+          "avg_ts": 20.020766,
+          "stddev_ts": 0.000322,
+          "samples_ns": [
+            25573874725,
+            25573413339,
+            25573054144
+          ],
+          "samples_ts": [
+            20.0204,
+            20.0208,
+            20.0211
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:05:33Z",
+          "avg_ns": 49221326704,
+          "stddev_ns": 3630689,
+          "avg_ts": 10.401995,
+          "stddev_ts": 0.000766,
+          "samples_ns": [
+            49222247316,
+            49217331283,
+            49224401514
+          ],
+          "samples_ts": [
+            10.4018,
+            10.4028,
+            10.4013
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 751
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:09:04.221687+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:08:02Z\",\n    \"avg_ns\": 6329967918,\n    \"stddev_ns\": 3424756,\n    \"avg_ts\": 20.221275,\n    \"stddev_ts\": 0.010931,\n    \"samples_ns\": [ 6328055082, 6327928993, 6333919681 ],\n    \"samples_ts\": [ 20.2274, 20.2278, 20.2087 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:08:27Z\",\n    \"avg_ns\": 12189816928,\n    \"stddev_ns\": 338385,\n    \"avg_ts\": 10.500568,\n    \"stddev_ts\": 0.000291,\n    \"samples_ns\": [ 12190032717, 12189426932, 12189991135 ],\n    \"samples_ts\": [ 10.5004, 10.5009, 10.5004 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:08:02Z",
+          "avg_ns": 6329967918,
+          "stddev_ns": 3424756,
+          "avg_ts": 20.221275,
+          "stddev_ts": 0.010931,
+          "samples_ns": [
+            6328055082,
+            6327928993,
+            6333919681
+          ],
+          "samples_ts": [
+            20.2274,
+            20.2278,
+            20.2087
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:08:27Z",
+          "avg_ns": 12189816928,
+          "stddev_ns": 338385,
+          "avg_ts": 10.500568,
+          "stddev_ts": 0.000291,
+          "samples_ns": [
+            12190032717,
+            12189426932,
+            12189991135
+          ],
+          "samples_ts": [
+            10.5004,
+            10.5009,
+            10.5004
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 752
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:11:58.550667+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:09:05Z\",\n    \"avg_ns\": 6330021280,\n    \"stddev_ns\": 5343894,\n    \"avg_ts\": 20.221111,\n    \"stddev_ts\": 0.017063,\n    \"samples_ns\": [ 6327108242, 6326766868, 6336188730 ],\n    \"samples_ts\": [ 20.2304, 20.2315, 20.2014 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:09:30Z\",\n    \"avg_ns\": 49287579501,\n    \"stddev_ns\": 2725819,\n    \"avg_ts\": 10.388013,\n    \"stddev_ts\": 0.000573,\n    \"samples_ns\": [ 49289637384, 49288601083, 49284500037 ],\n    \"samples_ts\": [ 10.3876, 10.3878, 10.3887 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:09:05Z",
+          "avg_ns": 6330021280,
+          "stddev_ns": 5343894,
+          "avg_ts": 20.221111,
+          "stddev_ts": 0.017063,
+          "samples_ns": [
+            6327108242,
+            6326766868,
+            6336188730
+          ],
+          "samples_ts": [
+            20.2304,
+            20.2315,
+            20.2014
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:09:30Z",
+          "avg_ns": 49287579501,
+          "stddev_ns": 2725819,
+          "avg_ts": 10.388013,
+          "stddev_ts": 0.000573,
+          "samples_ns": [
+            49289637384,
+            49288601083,
+            49284500037
+          ],
+          "samples_ts": [
+            10.3876,
+            10.3878,
+            10.3887
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 753
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:14:20.220866+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:11:59Z\",\n    \"avg_ns\": 25951734015,\n    \"stddev_ns\": 528042,\n    \"avg_ts\": 19.728932,\n    \"stddev_ts\": 0.000382,\n    \"samples_ns\": [ 25951753030, 25952227107, 25951221909 ],\n    \"samples_ts\": [ 19.7289, 19.7286, 19.7293 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:13:43Z\",\n    \"avg_ns\": 12227357224,\n    \"stddev_ns\": 1386971,\n    \"avg_ts\": 10.468329,\n    \"stddev_ts\": 0.001184,\n    \"samples_ns\": [ 12228826863, 12227162390, 12226082420 ],\n    \"samples_ts\": [ 10.4671, 10.4685, 10.4694 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:11:59Z",
+          "avg_ns": 25951734015,
+          "stddev_ns": 528042,
+          "avg_ts": 19.728932,
+          "stddev_ts": 0.000382,
+          "samples_ns": [
+            25951753030,
+            25952227107,
+            25951221909
+          ],
+          "samples_ts": [
+            19.7289,
+            19.7286,
+            19.7293
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:13:43Z",
+          "avg_ns": 12227357224,
+          "stddev_ns": 1386971,
+          "avg_ts": 10.468329,
+          "stddev_ts": 0.001184,
+          "samples_ns": [
+            12228826863,
+            12227162390,
+            12226082420
+          ],
+          "samples_ts": [
+            10.4671,
+            10.4685,
+            10.4694
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 754
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:18:33.356537+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:14:21Z\",\n    \"avg_ns\": 25994028362,\n    \"stddev_ns\": 77548380,\n    \"avg_ts\": 19.696948,\n    \"stddev_ts\": 0.058661,\n    \"samples_ns\": [ 26083557776, 25950699783, 25947827529 ],\n    \"samples_ts\": [ 19.6292, 19.7297, 19.7319 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:16:05Z\",\n    \"avg_ns\": 49139985255,\n    \"stddev_ns\": 2429555,\n    \"avg_ts\": 10.419214,\n    \"stddev_ts\": 0.000511,\n    \"samples_ns\": [ 49141172335, 49141570613, 49137212819 ],\n    \"samples_ts\": [ 10.419, 10.4189, 10.4198 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:14:21Z",
+          "avg_ns": 25994028362,
+          "stddev_ns": 77548380,
+          "avg_ts": 19.696948,
+          "stddev_ts": 0.058661,
+          "samples_ns": [
+            26083557776,
+            25950699783,
+            25947827529
+          ],
+          "samples_ts": [
+            19.6292,
+            19.7297,
+            19.7319
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:16:05Z",
+          "avg_ns": 49139985255,
+          "stddev_ns": 2429555,
+          "avg_ts": 10.419214,
+          "stddev_ts": 0.000511,
+          "samples_ns": [
+            49141172335,
+            49141570613,
+            49137212819
+          ],
+          "samples_ts": [
+            10.419,
+            10.4189,
+            10.4198
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 755
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:19:12.308207+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:18:34Z\",\n    \"avg_ns\": 3204916757,\n    \"stddev_ns\": 2125152,\n    \"avg_ts\": 39.938647,\n    \"stddev_ts\": 0.026463,\n    \"samples_ns\": [ 3203632545, 3203748845, 3207368882 ],\n    \"samples_ts\": [ 39.9546, 39.9532, 39.9081 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:18:47Z\",\n    \"avg_ns\": 8336656310,\n    \"stddev_ns\": 762384,\n    \"avg_ts\": 15.353878,\n    \"stddev_ts\": 0.001404,\n    \"samples_ns\": [ 8336279459, 8336155738, 8337533733 ],\n    \"samples_ts\": [ 15.3546, 15.3548, 15.3523 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:18:34Z",
+          "avg_ns": 3204916757,
+          "stddev_ns": 2125152,
+          "avg_ts": 39.938647,
+          "stddev_ts": 0.026463,
+          "samples_ns": [
+            3203632545,
+            3203748845,
+            3207368882
+          ],
+          "samples_ts": [
+            39.9546,
+            39.9532,
+            39.9081
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:18:47Z",
+          "avg_ns": 8336656310,
+          "stddev_ns": 762384,
+          "avg_ts": 15.353878,
+          "stddev_ts": 0.001404,
+          "samples_ns": [
+            8336279459,
+            8336155738,
+            8337533733
+          ],
+          "samples_ts": [
+            15.3546,
+            15.3548,
+            15.3523
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 756
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:21:07.300403+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:19:13Z\",\n    \"avg_ns\": 3206116187,\n    \"stddev_ns\": 3883829,\n    \"avg_ts\": 39.923733,\n    \"stddev_ts\": 0.048319,\n    \"samples_ns\": [ 3210597035, 3203737104, 3204014424 ],\n    \"samples_ts\": [ 39.868, 39.9533, 39.9499 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:19:26Z\",\n    \"avg_ns\": 33667636362,\n    \"stddev_ns\": 7901595,\n    \"avg_ts\": 15.207483,\n    \"stddev_ts\": 0.003568,\n    \"samples_ns\": [ 33661866390, 33676639527, 33664403170 ],\n    \"samples_ts\": [ 15.2101, 15.2034, 15.2089 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:19:13Z",
+          "avg_ns": 3206116187,
+          "stddev_ns": 3883829,
+          "avg_ts": 39.923733,
+          "stddev_ts": 0.048319,
+          "samples_ns": [
+            3210597035,
+            3203737104,
+            3204014424
+          ],
+          "samples_ts": [
+            39.868,
+            39.9533,
+            39.9499
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:19:26Z",
+          "avg_ns": 33667636362,
+          "stddev_ns": 7901595,
+          "avg_ts": 15.207483,
+          "stddev_ts": 0.003568,
+          "samples_ns": [
+            33661866390,
+            33676639527,
+            33664403170
+          ],
+          "samples_ts": [
+            15.2101,
+            15.2034,
+            15.2089
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 757
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:22:25.210031+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:21:08Z\",\n    \"avg_ns\": 12909439466,\n    \"stddev_ns\": 2770405,\n    \"avg_ts\": 39.660902,\n    \"stddev_ts\": 0.008496,\n    \"samples_ns\": [ 12912598306, 12907453072, 12908267022 ],\n    \"samples_ts\": [ 39.6512, 39.667, 39.6645 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:22:00Z\",\n    \"avg_ns\": 8344072278,\n    \"stddev_ns\": 3610725,\n    \"avg_ts\": 15.340233,\n    \"stddev_ts\": 0.006637,\n    \"samples_ns\": [ 8348180395, 8341401882, 8342634557 ],\n    \"samples_ts\": [ 15.3327, 15.3451, 15.3429 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:21:08Z",
+          "avg_ns": 12909439466,
+          "stddev_ns": 2770405,
+          "avg_ts": 39.660902,
+          "stddev_ts": 0.008496,
+          "samples_ns": [
+            12912598306,
+            12907453072,
+            12908267022
+          ],
+          "samples_ts": [
+            39.6512,
+            39.667,
+            39.6645
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:22:00Z",
+          "avg_ns": 8344072278,
+          "stddev_ns": 3610725,
+          "avg_ts": 15.340233,
+          "stddev_ts": 0.006637,
+          "samples_ns": [
+            8348180395,
+            8341401882,
+            8342634557
+          ],
+          "samples_ts": [
+            15.3327,
+            15.3451,
+            15.3429
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 758
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:25:00.042891+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:22:26Z\",\n    \"avg_ns\": 12978802977,\n    \"stddev_ns\": 99446918,\n    \"avg_ts\": 39.450476,\n    \"stddev_ts\": 0.301051,\n    \"samples_ns\": [ 13092641514, 12934931863, 12908835556 ],\n    \"samples_ts\": [ 39.1059, 39.5827, 39.6628 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:23:18Z\",\n    \"avg_ns\": 33889997692,\n    \"stddev_ns\": 7512406,\n    \"avg_ts\": 15.107703,\n    \"stddev_ts\": 0.003348,\n    \"samples_ns\": [ 33884325484, 33887152960, 33898514633 ],\n    \"samples_ts\": [ 15.1102, 15.109, 15.1039 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:22:26Z",
+          "avg_ns": 12978802977,
+          "stddev_ns": 99446918,
+          "avg_ts": 39.450476,
+          "stddev_ts": 0.301051,
+          "samples_ns": [
+            13092641514,
+            12934931863,
+            12908835556
+          ],
+          "samples_ts": [
+            39.1059,
+            39.5827,
+            39.6628
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:23:18Z",
+          "avg_ns": 33889997692,
+          "stddev_ns": 7512406,
+          "avg_ts": 15.107703,
+          "stddev_ts": 0.003348,
+          "samples_ns": [
+            33884325484,
+            33887152960,
+            33898514633
+          ],
+          "samples_ts": [
+            15.1102,
+            15.109,
+            15.1039
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 759
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:25:39.167302+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:25:01Z\",\n    \"avg_ns\": 3203975259,\n    \"stddev_ns\": 254247,\n    \"avg_ts\": 39.950371,\n    \"stddev_ts\": 0.003009,\n    \"samples_ns\": [ 3204250131, 3203798221, 3203877427 ],\n    \"samples_ts\": [ 39.9469, 39.9526, 39.9516 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:25:13Z\",\n    \"avg_ns\": 8394300693,\n    \"stddev_ns\": 5424963,\n    \"avg_ts\": 15.248445,\n    \"stddev_ts\": 0.009848,\n    \"samples_ns\": [ 8400541532, 8390730395, 8391630154 ],\n    \"samples_ts\": [ 15.2371, 15.2549, 15.2533 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:25:01Z",
+          "avg_ns": 3203975259,
+          "stddev_ns": 254247,
+          "avg_ts": 39.950371,
+          "stddev_ts": 0.003009,
+          "samples_ns": [
+            3204250131,
+            3203798221,
+            3203877427
+          ],
+          "samples_ts": [
+            39.9469,
+            39.9526,
+            39.9516
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:25:13Z",
+          "avg_ns": 8394300693,
+          "stddev_ns": 5424963,
+          "avg_ts": 15.248445,
+          "stddev_ts": 0.009848,
+          "samples_ns": [
+            8400541532,
+            8390730395,
+            8391630154
+          ],
+          "samples_ts": [
+            15.2371,
+            15.2549,
+            15.2533
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 760
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:27:34.020836+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:25:40Z\",\n    \"avg_ns\": 3203439159,\n    \"stddev_ns\": 937563,\n    \"avg_ts\": 39.957059,\n    \"stddev_ts\": 0.011650,\n    \"samples_ns\": [ 3202725448, 3203095601, 3204496430 ],\n    \"samples_ts\": [ 39.966, 39.9613, 39.9439 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:25:52Z\",\n    \"avg_ns\": 33628625587,\n    \"stddev_ns\": 50556956,\n    \"avg_ts\": 15.225147,\n    \"stddev_ts\": 0.022869,\n    \"samples_ns\": [ 33686972956, 33601072713, 33597831094 ],\n    \"samples_ts\": [ 15.1988, 15.2376, 15.2391 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:25:40Z",
+          "avg_ns": 3203439159,
+          "stddev_ns": 937563,
+          "avg_ts": 39.957059,
+          "stddev_ts": 0.01165,
+          "samples_ns": [
+            3202725448,
+            3203095601,
+            3204496430
+          ],
+          "samples_ts": [
+            39.966,
+            39.9613,
+            39.9439
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:25:52Z",
+          "avg_ns": 33628625587,
+          "stddev_ns": 50556956,
+          "avg_ts": 15.225147,
+          "stddev_ts": 0.022869,
+          "samples_ns": [
+            33686972956,
+            33601072713,
+            33597831094
+          ],
+          "samples_ts": [
+            15.1988,
+            15.2376,
+            15.2391
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 761
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:28:51.939493+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:27:35Z\",\n    \"avg_ns\": 12945774651,\n    \"stddev_ns\": 12817736,\n    \"avg_ts\": 39.549610,\n    \"stddev_ts\": 0.039134,\n    \"samples_ns\": [ 12939402236, 12960528604, 12937393115 ],\n    \"samples_ts\": [ 39.5691, 39.5046, 39.5752 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:28:26Z\",\n    \"avg_ns\": 8334541529,\n    \"stddev_ns\": 2172631,\n    \"avg_ts\": 15.357774,\n    \"stddev_ts\": 0.003996,\n    \"samples_ns\": [ 8332869224, 8336992057, 8333763308 ],\n    \"samples_ts\": [ 15.3609, 15.3533, 15.3592 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:27:35Z",
+          "avg_ns": 12945774651,
+          "stddev_ns": 12817736,
+          "avg_ts": 39.54961,
+          "stddev_ts": 0.039134,
+          "samples_ns": [
+            12939402236,
+            12960528604,
+            12937393115
+          ],
+          "samples_ts": [
+            39.5691,
+            39.5046,
+            39.5752
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:28:26Z",
+          "avg_ns": 8334541529,
+          "stddev_ns": 2172631,
+          "avg_ts": 15.357774,
+          "stddev_ts": 0.003996,
+          "samples_ns": [
+            8332869224,
+            8336992057,
+            8333763308
+          ],
+          "samples_ts": [
+            15.3609,
+            15.3533,
+            15.3592
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 762
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:31:25.744062+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:28:52Z\",\n    \"avg_ns\": 12937467763,\n    \"stddev_ns\": 4227011,\n    \"avg_ts\": 39.574981,\n    \"stddev_ts\": 0.012923,\n    \"samples_ns\": [ 12942309752, 12935567392, 12934526146 ],\n    \"samples_ts\": [ 39.5602, 39.5808, 39.584 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:29:44Z\",\n    \"avg_ns\": 33642343593,\n    \"stddev_ns\": 4123929,\n    \"avg_ts\": 15.218916,\n    \"stddev_ts\": 0.001862,\n    \"samples_ns\": [ 33641702541, 33638585971, 33646742269 ],\n    \"samples_ts\": [ 15.2192, 15.2206, 15.2169 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:28:52Z",
+          "avg_ns": 12937467763,
+          "stddev_ns": 4227011,
+          "avg_ts": 39.574981,
+          "stddev_ts": 0.012923,
+          "samples_ns": [
+            12942309752,
+            12935567392,
+            12934526146
+          ],
+          "samples_ts": [
+            39.5602,
+            39.5808,
+            39.584
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:29:44Z",
+          "avg_ns": 33642343593,
+          "stddev_ns": 4123929,
+          "avg_ts": 15.218916,
+          "stddev_ts": 0.001862,
+          "samples_ns": [
+            33641702541,
+            33638585971,
+            33646742269
+          ],
+          "samples_ts": [
+            15.2192,
+            15.2206,
+            15.2169
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 763
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:32:04.628682+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:31:26Z\",\n    \"avg_ns\": 3202415619,\n    \"stddev_ns\": 439808,\n    \"avg_ts\": 39.969828,\n    \"stddev_ts\": 0.005397,\n    \"samples_ns\": [ 3202913318, 3202202106, 3202131435 ],\n    \"samples_ts\": [ 39.9636, 39.9725, 39.9734 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:31:39Z\",\n    \"avg_ns\": 8316470552,\n    \"stddev_ns\": 5766830,\n    \"avg_ts\": 15.391149,\n    \"stddev_ts\": 0.010668,\n    \"samples_ns\": [ 8315262120, 8322744378, 8311405160 ],\n    \"samples_ts\": [ 15.3934, 15.3795, 15.4005 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:31:26Z",
+          "avg_ns": 3202415619,
+          "stddev_ns": 439808,
+          "avg_ts": 39.969828,
+          "stddev_ts": 0.005397,
+          "samples_ns": [
+            3202913318,
+            3202202106,
+            3202131435
+          ],
+          "samples_ts": [
+            39.9636,
+            39.9725,
+            39.9734
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:31:39Z",
+          "avg_ns": 8316470552,
+          "stddev_ns": 5766830,
+          "avg_ts": 15.391149,
+          "stddev_ts": 0.010668,
+          "samples_ns": [
+            8315262120,
+            8322744378,
+            8311405160
+          ],
+          "samples_ts": [
+            15.3934,
+            15.3795,
+            15.4005
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 764
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:33:59.921814+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:32:05Z\",\n    \"avg_ns\": 3202577157,\n    \"stddev_ns\": 312587,\n    \"avg_ts\": 39.967812,\n    \"stddev_ts\": 0.003771,\n    \"samples_ns\": [ 3202551905, 3202891160, 3202288408 ],\n    \"samples_ts\": [ 39.9681, 39.9639, 39.9714 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:32:18Z\",\n    \"avg_ns\": 33785009801,\n    \"stddev_ns\": 112950950,\n    \"avg_ts\": 15.154763,\n    \"stddev_ts\": 0.050568,\n    \"samples_ns\": [ 33721428761, 33915420493, 33718180151 ],\n    \"samples_ts\": [ 15.1832, 15.0964, 15.1847 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:32:05Z",
+          "avg_ns": 3202577157,
+          "stddev_ns": 312587,
+          "avg_ts": 39.967812,
+          "stddev_ts": 0.003771,
+          "samples_ns": [
+            3202551905,
+            3202891160,
+            3202288408
+          ],
+          "samples_ts": [
+            39.9681,
+            39.9639,
+            39.9714
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:32:18Z",
+          "avg_ns": 33785009801,
+          "stddev_ns": 112950950,
+          "avg_ts": 15.154763,
+          "stddev_ts": 0.050568,
+          "samples_ns": [
+            33721428761,
+            33915420493,
+            33718180151
+          ],
+          "samples_ts": [
+            15.1832,
+            15.0964,
+            15.1847
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 765
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:35:18.784214+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:34:00Z\",\n    \"avg_ns\": 13141816258,\n    \"stddev_ns\": 217001,\n    \"avg_ts\": 38.959607,\n    \"stddev_ts\": 0.000643,\n    \"samples_ns\": [ 13141951334, 13141931492, 13141565948 ],\n    \"samples_ts\": [ 38.9592, 38.9593, 38.9603 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:34:53Z\",\n    \"avg_ns\": 8378079757,\n    \"stddev_ns\": 2334885,\n    \"avg_ts\": 15.277965,\n    \"stddev_ts\": 0.004258,\n    \"samples_ns\": [ 8378851033, 8379931426, 8375456812 ],\n    \"samples_ts\": [ 15.2766, 15.2746, 15.2827 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:34:00Z",
+          "avg_ns": 13141816258,
+          "stddev_ns": 217001,
+          "avg_ts": 38.959607,
+          "stddev_ts": 0.000643,
+          "samples_ns": [
+            13141951334,
+            13141931492,
+            13141565948
+          ],
+          "samples_ts": [
+            38.9592,
+            38.9593,
+            38.9603
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:34:53Z",
+          "avg_ns": 8378079757,
+          "stddev_ns": 2334885,
+          "avg_ts": 15.277965,
+          "stddev_ts": 0.004258,
+          "samples_ns": [
+            8378851033,
+            8379931426,
+            8375456812
+          ],
+          "samples_ts": [
+            15.2766,
+            15.2746,
+            15.2827
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 766
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:37:54.275945+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:35:19Z\",\n    \"avg_ns\": 13145441266,\n    \"stddev_ns\": 1179926,\n    \"avg_ts\": 38.948864,\n    \"stddev_ts\": 0.003463,\n    \"samples_ns\": [ 13146759333, 13144531314, 13145033153 ],\n    \"samples_ts\": [ 38.945, 38.9516, 38.9501 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:36:12Z\",\n    \"avg_ns\": 33925458567,\n    \"stddev_ns\": 3841578,\n    \"avg_ts\": 15.091911,\n    \"stddev_ts\": 0.001707,\n    \"samples_ns\": [ 33921246729, 33926373221, 33928755752 ],\n    \"samples_ts\": [ 15.0938, 15.0915, 15.0904 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:35:19Z",
+          "avg_ns": 13145441266,
+          "stddev_ns": 1179926,
+          "avg_ts": 38.948864,
+          "stddev_ts": 0.003463,
+          "samples_ns": [
+            13146759333,
+            13144531314,
+            13145033153
+          ],
+          "samples_ts": [
+            38.945,
+            38.9516,
+            38.9501
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:36:12Z",
+          "avg_ns": 33925458567,
+          "stddev_ns": 3841578,
+          "avg_ts": 15.091911,
+          "stddev_ts": 0.001707,
+          "samples_ns": [
+            33921246729,
+            33926373221,
+            33928755752
+          ],
+          "samples_ts": [
+            15.0938,
+            15.0915,
+            15.0904
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 767
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:38:33.420530+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:37:55Z\",\n    \"avg_ns\": 3201767600,\n    \"stddev_ns\": 191655,\n    \"avg_ts\": 39.977917,\n    \"stddev_ts\": 0.002286,\n    \"samples_ns\": [ 3201657717, 3201978985, 3201666099 ],\n    \"samples_ts\": [ 39.9793, 39.9753, 39.9792 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:38:08Z\",\n    \"avg_ns\": 8405159648,\n    \"stddev_ns\": 6679693,\n    \"avg_ts\": 15.228747,\n    \"stddev_ts\": 0.012095,\n    \"samples_ns\": [ 8400289315, 8402416845, 8412772786 ],\n    \"samples_ts\": [ 15.2376, 15.2337, 15.215 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:37:55Z",
+          "avg_ns": 3201767600,
+          "stddev_ns": 191655,
+          "avg_ts": 39.977917,
+          "stddev_ts": 0.002286,
+          "samples_ns": [
+            3201657717,
+            3201978985,
+            3201666099
+          ],
+          "samples_ts": [
+            39.9793,
+            39.9753,
+            39.9792
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:38:08Z",
+          "avg_ns": 8405159648,
+          "stddev_ns": 6679693,
+          "avg_ts": 15.228747,
+          "stddev_ts": 0.012095,
+          "samples_ns": [
+            8400289315,
+            8402416845,
+            8412772786
+          ],
+          "samples_ts": [
+            15.2376,
+            15.2337,
+            15.215
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 768
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:40:28.955996+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:38:34Z\",\n    \"avg_ns\": 3202027689,\n    \"stddev_ns\": 196681,\n    \"avg_ts\": 39.974670,\n    \"stddev_ts\": 0.002243,\n    \"samples_ns\": [ 3202030988, 3201846398, 3202205683 ],\n    \"samples_ts\": [ 39.9746, 39.9769, 39.9724 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:38:47Z\",\n    \"avg_ns\": 33851291252,\n    \"stddev_ns\": 122163940,\n    \"avg_ts\": 15.125108,\n    \"stddev_ts\": 0.054674,\n    \"samples_ns\": [ 33893554307, 33946711731, 33713607719 ],\n    \"samples_ts\": [ 15.1061, 15.0825, 15.1867 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:38:34Z",
+          "avg_ns": 3202027689,
+          "stddev_ns": 196681,
+          "avg_ts": 39.97467,
+          "stddev_ts": 0.002243,
+          "samples_ns": [
+            3202030988,
+            3201846398,
+            3202205683
+          ],
+          "samples_ts": [
+            39.9746,
+            39.9769,
+            39.9724
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:38:47Z",
+          "avg_ns": 33851291252,
+          "stddev_ns": 122163940,
+          "avg_ts": 15.125108,
+          "stddev_ts": 0.054674,
+          "samples_ns": [
+            33893554307,
+            33946711731,
+            33713607719
+          ],
+          "samples_ts": [
+            15.1061,
+            15.0825,
+            15.1867
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 769
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:41:46.888475+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:40:29Z\",\n    \"avg_ns\": 12908304023,\n    \"stddev_ns\": 688403,\n    \"avg_ts\": 39.664390,\n    \"stddev_ts\": 0.002115,\n    \"samples_ns\": [ 12908217102, 12909031759, 12907663208 ],\n    \"samples_ts\": [ 39.6647, 39.6622, 39.6664 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:41:21Z\",\n    \"avg_ns\": 8388277285,\n    \"stddev_ns\": 1886228,\n    \"avg_ts\": 15.259391,\n    \"stddev_ts\": 0.003431,\n    \"samples_ns\": [ 8386667865, 8387811097, 8390352893 ],\n    \"samples_ts\": [ 15.2623, 15.2602, 15.2556 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:40:29Z",
+          "avg_ns": 12908304023,
+          "stddev_ns": 688403,
+          "avg_ts": 39.66439,
+          "stddev_ts": 0.002115,
+          "samples_ns": [
+            12908217102,
+            12909031759,
+            12907663208
+          ],
+          "samples_ts": [
+            39.6647,
+            39.6622,
+            39.6664
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:41:21Z",
+          "avg_ns": 8388277285,
+          "stddev_ns": 1886228,
+          "avg_ts": 15.259391,
+          "stddev_ts": 0.003431,
+          "samples_ns": [
+            8386667865,
+            8387811097,
+            8390352893
+          ],
+          "samples_ts": [
+            15.2623,
+            15.2602,
+            15.2556
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 770
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:44:20.765755+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:41:47Z\",\n    \"avg_ns\": 12913534970,\n    \"stddev_ns\": 2108460,\n    \"avg_ts\": 39.648323,\n    \"stddev_ts\": 0.006473,\n    \"samples_ns\": [ 12911664999, 12915820145, 12913119766 ],\n    \"samples_ts\": [ 39.6541, 39.6413, 39.6496 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:42:39Z\",\n    \"avg_ns\": 33698147367,\n    \"stddev_ns\": 2955017,\n    \"avg_ts\": 15.193714,\n    \"stddev_ts\": 0.001332,\n    \"samples_ns\": [ 33701159397, 33695252869, 33698029835 ],\n    \"samples_ts\": [ 15.1924, 15.195, 15.1938 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:41:47Z",
+          "avg_ns": 12913534970,
+          "stddev_ns": 2108460,
+          "avg_ts": 39.648323,
+          "stddev_ts": 0.006473,
+          "samples_ns": [
+            12911664999,
+            12915820145,
+            12913119766
+          ],
+          "samples_ts": [
+            39.6541,
+            39.6413,
+            39.6496
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:42:39Z",
+          "avg_ns": 33698147367,
+          "stddev_ns": 2955017,
+          "avg_ts": 15.193714,
+          "stddev_ts": 0.001332,
+          "samples_ns": [
+            33701159397,
+            33695252869,
+            33698029835
+          ],
+          "samples_ts": [
+            15.1924,
+            15.195,
+            15.1938
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 771
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:44:59.757028+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:44:21Z\",\n    \"avg_ns\": 3204865293,\n    \"stddev_ns\": 2647978,\n    \"avg_ts\": 39.939294,\n    \"stddev_ts\": 0.032976,\n    \"samples_ns\": [ 3207921683, 3203287657, 3203386540 ],\n    \"samples_ts\": [ 39.9012, 39.9589, 39.9577 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:44:34Z\",\n    \"avg_ns\": 8348528903,\n    \"stddev_ns\": 1065755,\n    \"avg_ts\": 15.332043,\n    \"stddev_ts\": 0.001957,\n    \"samples_ns\": [ 8347305022, 8349252298, 8349029389 ],\n    \"samples_ts\": [ 15.3343, 15.3307, 15.3311 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:44:21Z",
+          "avg_ns": 3204865293,
+          "stddev_ns": 2647978,
+          "avg_ts": 39.939294,
+          "stddev_ts": 0.032976,
+          "samples_ns": [
+            3207921683,
+            3203287657,
+            3203386540
+          ],
+          "samples_ts": [
+            39.9012,
+            39.9589,
+            39.9577
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:44:34Z",
+          "avg_ns": 8348528903,
+          "stddev_ns": 1065755,
+          "avg_ts": 15.332043,
+          "stddev_ts": 0.001957,
+          "samples_ns": [
+            8347305022,
+            8349252298,
+            8349029389
+          ],
+          "samples_ts": [
+            15.3343,
+            15.3307,
+            15.3311
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 772
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:46:55.262897+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:45:00Z\",\n    \"avg_ns\": 3203822616,\n    \"stddev_ns\": 477633,\n    \"avg_ts\": 39.952275,\n    \"stddev_ts\": 0.005956,\n    \"samples_ns\": [ 3204371122, 3203598256, 3203498470 ],\n    \"samples_ts\": [ 39.9454, 39.9551, 39.9563 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:45:13Z\",\n    \"avg_ns\": 33855015564,\n    \"stddev_ns\": 2231671,\n    \"avg_ts\": 15.123313,\n    \"stddev_ts\": 0.000990,\n    \"samples_ns\": [ 33855587019, 33852569345, 33856890330 ],\n    \"samples_ts\": [ 15.1231, 15.1244, 15.1225 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:45:00Z",
+          "avg_ns": 3203822616,
+          "stddev_ns": 477633,
+          "avg_ts": 39.952275,
+          "stddev_ts": 0.005956,
+          "samples_ns": [
+            3204371122,
+            3203598256,
+            3203498470
+          ],
+          "samples_ts": [
+            39.9454,
+            39.9551,
+            39.9563
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:45:13Z",
+          "avg_ns": 33855015564,
+          "stddev_ns": 2231671,
+          "avg_ts": 15.123313,
+          "stddev_ts": 0.00099,
+          "samples_ns": [
+            33855587019,
+            33852569345,
+            33856890330
+          ],
+          "samples_ts": [
+            15.1231,
+            15.1244,
+            15.1225
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 773
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:48:13.174186+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:46:56Z\",\n    \"avg_ns\": 12935793867,\n    \"stddev_ns\": 1851972,\n    \"avg_ts\": 39.580099,\n    \"stddev_ts\": 0.005645,\n    \"samples_ns\": [ 12937922081, 12934646316, 12934813206 ],\n    \"samples_ts\": [ 39.5736, 39.5836, 39.5831 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:47:47Z\",\n    \"avg_ns\": 8342371262,\n    \"stddev_ns\": 2416197,\n    \"avg_ts\": 15.343360,\n    \"stddev_ts\": 0.004440,\n    \"samples_ns\": [ 8345156154, 8341092616, 8340865017 ],\n    \"samples_ts\": [ 15.3382, 15.3457, 15.3461 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:46:56Z",
+          "avg_ns": 12935793867,
+          "stddev_ns": 1851972,
+          "avg_ts": 39.580099,
+          "stddev_ts": 0.005645,
+          "samples_ns": [
+            12937922081,
+            12934646316,
+            12934813206
+          ],
+          "samples_ts": [
+            39.5736,
+            39.5836,
+            39.5831
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:47:47Z",
+          "avg_ns": 8342371262,
+          "stddev_ns": 2416197,
+          "avg_ts": 15.34336,
+          "stddev_ts": 0.00444,
+          "samples_ns": [
+            8345156154,
+            8341092616,
+            8340865017
+          ],
+          "samples_ts": [
+            15.3382,
+            15.3457,
+            15.3461
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 774
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:50:47.303204+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:48:14Z\",\n    \"avg_ns\": 12930581882,\n    \"stddev_ns\": 323612,\n    \"avg_ts\": 39.596053,\n    \"stddev_ts\": 0.000928,\n    \"samples_ns\": [ 12930251007, 12930648903, 12930845737 ],\n    \"samples_ts\": [ 39.5971, 39.5958, 39.5952 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:49:05Z\",\n    \"avg_ns\": 33752334050,\n    \"stddev_ns\": 4534926,\n    \"avg_ts\": 15.169321,\n    \"stddev_ts\": 0.002035,\n    \"samples_ns\": [ 33751253558, 33757304021, 33748444573 ],\n    \"samples_ts\": [ 15.1698, 15.1671, 15.1711 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:48:14Z",
+          "avg_ns": 12930581882,
+          "stddev_ns": 323612,
+          "avg_ts": 39.596053,
+          "stddev_ts": 0.000928,
+          "samples_ns": [
+            12930251007,
+            12930648903,
+            12930845737
+          ],
+          "samples_ts": [
+            39.5971,
+            39.5958,
+            39.5952
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:49:05Z",
+          "avg_ns": 33752334050,
+          "stddev_ns": 4534926,
+          "avg_ts": 15.169321,
+          "stddev_ts": 0.002035,
+          "samples_ns": [
+            33751253558,
+            33757304021,
+            33748444573
+          ],
+          "samples_ts": [
+            15.1698,
+            15.1671,
+            15.1711
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 775
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:51:26.253299+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:50:48Z\",\n    \"avg_ns\": 3205667375,\n    \"stddev_ns\": 2970998,\n    \"avg_ts\": 39.929306,\n    \"stddev_ts\": 0.036980,\n    \"samples_ns\": [ 3203830675, 3204077035, 3209094416 ],\n    \"samples_ts\": [ 39.9522, 39.9491, 39.8866 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:51:01Z\",\n    \"avg_ns\": 8329877655,\n    \"stddev_ns\": 1303285,\n    \"avg_ts\": 15.366372,\n    \"stddev_ts\": 0.002398,\n    \"samples_ns\": [ 8331298021, 8329588357, 8328746588 ],\n    \"samples_ts\": [ 15.3638, 15.3669, 15.3685 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:50:48Z",
+          "avg_ns": 3205667375,
+          "stddev_ns": 2970998,
+          "avg_ts": 39.929306,
+          "stddev_ts": 0.03698,
+          "samples_ns": [
+            3203830675,
+            3204077035,
+            3209094416
+          ],
+          "samples_ts": [
+            39.9522,
+            39.9491,
+            39.8866
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:51:01Z",
+          "avg_ns": 8329877655,
+          "stddev_ns": 1303285,
+          "avg_ts": 15.366372,
+          "stddev_ts": 0.002398,
+          "samples_ns": [
+            8331298021,
+            8329588357,
+            8328746588
+          ],
+          "samples_ts": [
+            15.3638,
+            15.3669,
+            15.3685
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 776
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:53:22.261922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:51:27Z\",\n    \"avg_ns\": 3205710592,\n    \"stddev_ns\": 4046042,\n    \"avg_ts\": 39.928787,\n    \"stddev_ts\": 0.050349,\n    \"samples_ns\": [ 3210381109, 3203314035, 3203436634 ],\n    \"samples_ts\": [ 39.8707, 39.9586, 39.9571 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:51:40Z\",\n    \"avg_ns\": 34010607226,\n    \"stddev_ns\": 13433602,\n    \"avg_ts\": 15.054129,\n    \"stddev_ts\": 0.005946,\n    \"samples_ns\": [ 34024235433, 33997377076, 34010209169 ],\n    \"samples_ts\": [ 15.0481, 15.06, 15.0543 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:51:27Z",
+          "avg_ns": 3205710592,
+          "stddev_ns": 4046042,
+          "avg_ts": 39.928787,
+          "stddev_ts": 0.050349,
+          "samples_ns": [
+            3210381109,
+            3203314035,
+            3203436634
+          ],
+          "samples_ts": [
+            39.8707,
+            39.9586,
+            39.9571
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:51:40Z",
+          "avg_ns": 34010607226,
+          "stddev_ns": 13433602,
+          "avg_ts": 15.054129,
+          "stddev_ts": 0.005946,
+          "samples_ns": [
+            34024235433,
+            33997377076,
+            34010209169
+          ],
+          "samples_ts": [
+            15.0481,
+            15.06,
+            15.0543
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 777
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:54:40.966797+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:53:23Z\",\n    \"avg_ns\": 13146391910,\n    \"stddev_ns\": 1451842,\n    \"avg_ts\": 38.946048,\n    \"stddev_ts\": 0.004301,\n    \"samples_ns\": [ 13148040830, 13145829449, 13145305451 ],\n    \"samples_ts\": [ 38.9412, 38.9477, 38.9493 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:54:15Z\",\n    \"avg_ns\": 8320969203,\n    \"stddev_ns\": 765648,\n    \"avg_ts\": 15.382824,\n    \"stddev_ts\": 0.001416,\n    \"samples_ns\": [ 8321424942, 8320085251, 8321397416 ],\n    \"samples_ts\": [ 15.382, 15.3845, 15.382 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:53:23Z",
+          "avg_ns": 13146391910,
+          "stddev_ns": 1451842,
+          "avg_ts": 38.946048,
+          "stddev_ts": 0.004301,
+          "samples_ns": [
+            13148040830,
+            13145829449,
+            13145305451
+          ],
+          "samples_ts": [
+            38.9412,
+            38.9477,
+            38.9493
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:54:15Z",
+          "avg_ns": 8320969203,
+          "stddev_ns": 765648,
+          "avg_ts": 15.382824,
+          "stddev_ts": 0.001416,
+          "samples_ns": [
+            8321424942,
+            8320085251,
+            8321397416
+          ],
+          "samples_ts": [
+            15.382,
+            15.3845,
+            15.382
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 778
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:57:17.292286+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:54:42Z\",\n    \"avg_ns\": 13329747887,\n    \"stddev_ns\": 1774446,\n    \"avg_ts\": 38.410329,\n    \"stddev_ts\": 0.005102,\n    \"samples_ns\": [ 13328656859, 13331790924, 13328795879 ],\n    \"samples_ts\": [ 38.4135, 38.4044, 38.4131 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:55:35Z\",\n    \"avg_ns\": 33927118317,\n    \"stddev_ns\": 7548546,\n    \"avg_ts\": 15.091173,\n    \"stddev_ts\": 0.003357,\n    \"samples_ns\": [ 33935755646, 33923813537, 33921785768 ],\n    \"samples_ts\": [ 15.0873, 15.0926, 15.0935 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:54:42Z",
+          "avg_ns": 13329747887,
+          "stddev_ns": 1774446,
+          "avg_ts": 38.410329,
+          "stddev_ts": 0.005102,
+          "samples_ns": [
+            13328656859,
+            13331790924,
+            13328795879
+          ],
+          "samples_ts": [
+            38.4135,
+            38.4044,
+            38.4131
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:55:35Z",
+          "avg_ns": 33927118317,
+          "stddev_ns": 7548546,
+          "avg_ts": 15.091173,
+          "stddev_ts": 0.003357,
+          "samples_ns": [
+            33935755646,
+            33923813537,
+            33921785768
+          ],
+          "samples_ts": [
+            15.0873,
+            15.0926,
+            15.0935
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 779
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:57:56.251790+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:57:18Z\",\n    \"avg_ns\": 3203345595,\n    \"stddev_ns\": 188729,\n    \"avg_ts\": 39.958224,\n    \"stddev_ts\": 0.002354,\n    \"samples_ns\": [ 3203242036, 3203231316, 3203563433 ],\n    \"samples_ts\": [ 39.9595, 39.9596, 39.9555 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:57:31Z\",\n    \"avg_ns\": 8335573959,\n    \"stddev_ns\": 7647930,\n    \"avg_ts\": 15.355880,\n    \"stddev_ts\": 0.014081,\n    \"samples_ns\": [ 8344404112, 8331220644, 8331097122 ],\n    \"samples_ts\": [ 15.3396, 15.3639, 15.3641 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:57:18Z",
+          "avg_ns": 3203345595,
+          "stddev_ns": 188729,
+          "avg_ts": 39.958224,
+          "stddev_ts": 0.002354,
+          "samples_ns": [
+            3203242036,
+            3203231316,
+            3203563433
+          ],
+          "samples_ts": [
+            39.9595,
+            39.9596,
+            39.9555
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:57:31Z",
+          "avg_ns": 8335573959,
+          "stddev_ns": 7647930,
+          "avg_ts": 15.35588,
+          "stddev_ts": 0.014081,
+          "samples_ns": [
+            8344404112,
+            8331220644,
+            8331097122
+          ],
+          "samples_ts": [
+            15.3396,
+            15.3639,
+            15.3641
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 780
+    },
+    {
+      "timestamp_utc": "2025-12-09T13:59:51.099838+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:57:57Z\",\n    \"avg_ns\": 3205980486,\n    \"stddev_ns\": 2444837,\n    \"avg_ts\": 39.925399,\n    \"stddev_ts\": 0.030425,\n    \"samples_ns\": [ 3204572725, 3208802782, 3204565952 ],\n    \"samples_ts\": [ 39.9429, 39.8903, 39.943 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:58:10Z\",\n    \"avg_ns\": 33628318517,\n    \"stddev_ns\": 6131019,\n    \"avg_ts\": 15.225264,\n    \"stddev_ts\": 0.002775,\n    \"samples_ns\": [ 33634486203, 33628238923, 33622230426 ],\n    \"samples_ts\": [ 15.2225, 15.2253, 15.228 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:57:57Z",
+          "avg_ns": 3205980486,
+          "stddev_ns": 2444837,
+          "avg_ts": 39.925399,
+          "stddev_ts": 0.030425,
+          "samples_ns": [
+            3204572725,
+            3208802782,
+            3204565952
+          ],
+          "samples_ts": [
+            39.9429,
+            39.8903,
+            39.943
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:58:10Z",
+          "avg_ns": 33628318517,
+          "stddev_ns": 6131019,
+          "avg_ts": 15.225264,
+          "stddev_ts": 0.002775,
+          "samples_ns": [
+            33634486203,
+            33628238923,
+            33622230426
+          ],
+          "samples_ts": [
+            15.2225,
+            15.2253,
+            15.228
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 781
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:01:09.103111+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T13:59:52Z\",\n    \"avg_ns\": 12906931517,\n    \"stddev_ns\": 2313595,\n    \"avg_ts\": 39.668608,\n    \"stddev_ts\": 0.007110,\n    \"samples_ns\": [ 12909602999, 12905606419, 12905585133 ],\n    \"samples_ts\": [ 39.6604, 39.6727, 39.6727 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:00:43Z\",\n    \"avg_ns\": 8410714302,\n    \"stddev_ns\": 3572149,\n    \"avg_ts\": 15.218685,\n    \"stddev_ts\": 0.006460,\n    \"samples_ns\": [ 8414781351, 8409269148, 8408092408 ],\n    \"samples_ts\": [ 15.2113, 15.2213, 15.2234 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T13:59:52Z",
+          "avg_ns": 12906931517,
+          "stddev_ns": 2313595,
+          "avg_ts": 39.668608,
+          "stddev_ts": 0.00711,
+          "samples_ns": [
+            12909602999,
+            12905606419,
+            12905585133
+          ],
+          "samples_ts": [
+            39.6604,
+            39.6727,
+            39.6727
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:00:43Z",
+          "avg_ns": 8410714302,
+          "stddev_ns": 3572149,
+          "avg_ts": 15.218685,
+          "stddev_ts": 0.00646,
+          "samples_ns": [
+            8414781351,
+            8409269148,
+            8408092408
+          ],
+          "samples_ts": [
+            15.2113,
+            15.2213,
+            15.2234
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 782
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:03:43.109985+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:01:10Z\",\n    \"avg_ns\": 12911351913,\n    \"stddev_ns\": 749420,\n    \"avg_ts\": 39.655026,\n    \"stddev_ts\": 0.002275,\n    \"samples_ns\": [ 12911632845, 12910511785, 12911911110 ],\n    \"samples_ts\": [ 39.6542, 39.6576, 39.6533 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:02:01Z\",\n    \"avg_ns\": 33736190869,\n    \"stddev_ns\": 3681061,\n    \"avg_ts\": 15.176580,\n    \"stddev_ts\": 0.001654,\n    \"samples_ns\": [ 33740124302, 33735607107, 33732841199 ],\n    \"samples_ts\": [ 15.1748, 15.1768, 15.1781 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:01:10Z",
+          "avg_ns": 12911351913,
+          "stddev_ns": 749420,
+          "avg_ts": 39.655026,
+          "stddev_ts": 0.002275,
+          "samples_ns": [
+            12911632845,
+            12910511785,
+            12911911110
+          ],
+          "samples_ts": [
+            39.6542,
+            39.6576,
+            39.6533
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:02:01Z",
+          "avg_ns": 33736190869,
+          "stddev_ns": 3681061,
+          "avg_ts": 15.17658,
+          "stddev_ts": 0.001654,
+          "samples_ns": [
+            33740124302,
+            33735607107,
+            33732841199
+          ],
+          "samples_ts": [
+            15.1748,
+            15.1768,
+            15.1781
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 783
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:04:22.069545+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:03:44Z\",\n    \"avg_ns\": 3205073478,\n    \"stddev_ns\": 267460,\n    \"avg_ts\": 39.936682,\n    \"stddev_ts\": 0.003257,\n    \"samples_ns\": [ 3204873091, 3205369155, 3204978189 ],\n    \"samples_ts\": [ 39.9392, 39.933, 39.9379 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:03:56Z\",\n    \"avg_ns\": 8331283704,\n    \"stddev_ns\": 5552249,\n    \"avg_ts\": 15.363783,\n    \"stddev_ts\": 0.010240,\n    \"samples_ns\": [ 8336544787, 8325480115, 8331826210 ],\n    \"samples_ts\": [ 15.3541, 15.3745, 15.3628 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:03:44Z",
+          "avg_ns": 3205073478,
+          "stddev_ns": 267460,
+          "avg_ts": 39.936682,
+          "stddev_ts": 0.003257,
+          "samples_ns": [
+            3204873091,
+            3205369155,
+            3204978189
+          ],
+          "samples_ts": [
+            39.9392,
+            39.933,
+            39.9379
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:03:56Z",
+          "avg_ns": 8331283704,
+          "stddev_ns": 5552249,
+          "avg_ts": 15.363783,
+          "stddev_ts": 0.01024,
+          "samples_ns": [
+            8336544787,
+            8325480115,
+            8331826210
+          ],
+          "samples_ts": [
+            15.3541,
+            15.3745,
+            15.3628
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 784
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:06:16.876395+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:04:23Z\",\n    \"avg_ns\": 3203324882,\n    \"stddev_ns\": 187313,\n    \"avg_ts\": 39.958482,\n    \"stddev_ts\": 0.002337,\n    \"samples_ns\": [ 3203338980, 3203504748, 3203130918 ],\n    \"samples_ts\": [ 39.9583, 39.9562, 39.9609 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:04:35Z\",\n    \"avg_ns\": 33614816694,\n    \"stddev_ns\": 14559129,\n    \"avg_ts\": 15.231381,\n    \"stddev_ts\": 0.006598,\n    \"samples_ns\": [ 33623973378, 33598029708, 33622446997 ],\n    \"samples_ts\": [ 15.2272, 15.239, 15.2279 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:04:23Z",
+          "avg_ns": 3203324882,
+          "stddev_ns": 187313,
+          "avg_ts": 39.958482,
+          "stddev_ts": 0.002337,
+          "samples_ns": [
+            3203338980,
+            3203504748,
+            3203130918
+          ],
+          "samples_ts": [
+            39.9583,
+            39.9562,
+            39.9609
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:04:35Z",
+          "avg_ns": 33614816694,
+          "stddev_ns": 14559129,
+          "avg_ts": 15.231381,
+          "stddev_ts": 0.006598,
+          "samples_ns": [
+            33623973378,
+            33598029708,
+            33622446997
+          ],
+          "samples_ts": [
+            15.2272,
+            15.239,
+            15.2279
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 785
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:07:34.777890+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:06:17Z\",\n    \"avg_ns\": 12932797034,\n    \"stddev_ns\": 1489188,\n    \"avg_ts\": 39.589271,\n    \"stddev_ts\": 0.004558,\n    \"samples_ns\": [ 12932129039, 12934503263, 12931758800 ],\n    \"samples_ts\": [ 39.5913, 39.584, 39.5924 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:07:09Z\",\n    \"avg_ns\": 8340343850,\n    \"stddev_ns\": 979488,\n    \"avg_ts\": 15.347089,\n    \"stddev_ts\": 0.001787,\n    \"samples_ns\": [ 8340278639, 8341345749, 8339407164 ],\n    \"samples_ts\": [ 15.3472, 15.3452, 15.3488 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:06:17Z",
+          "avg_ns": 12932797034,
+          "stddev_ns": 1489188,
+          "avg_ts": 39.589271,
+          "stddev_ts": 0.004558,
+          "samples_ns": [
+            12932129039,
+            12934503263,
+            12931758800
+          ],
+          "samples_ts": [
+            39.5913,
+            39.584,
+            39.5924
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:07:09Z",
+          "avg_ns": 8340343850,
+          "stddev_ns": 979488,
+          "avg_ts": 15.347089,
+          "stddev_ts": 0.001787,
+          "samples_ns": [
+            8340278639,
+            8341345749,
+            8339407164
+          ],
+          "samples_ts": [
+            15.3472,
+            15.3452,
+            15.3488
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 786
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:10:08.736829+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:07:35Z\",\n    \"avg_ns\": 12937456020,\n    \"stddev_ns\": 3477362,\n    \"avg_ts\": 39.575016,\n    \"stddev_ts\": 0.010636,\n    \"samples_ns\": [ 12941287978, 12934501233, 12936578849 ],\n    \"samples_ts\": [ 39.5633, 39.5841, 39.5777 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:08:27Z\",\n    \"avg_ns\": 33687651114,\n    \"stddev_ns\": 7480618,\n    \"avg_ts\": 15.198448,\n    \"stddev_ts\": 0.003375,\n    \"samples_ns\": [ 33693196724, 33690613665, 33679142953 ],\n    \"samples_ts\": [ 15.1959, 15.1971, 15.2023 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:07:35Z",
+          "avg_ns": 12937456020,
+          "stddev_ns": 3477362,
+          "avg_ts": 39.575016,
+          "stddev_ts": 0.010636,
+          "samples_ns": [
+            12941287978,
+            12934501233,
+            12936578849
+          ],
+          "samples_ts": [
+            39.5633,
+            39.5841,
+            39.5777
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:08:27Z",
+          "avg_ns": 33687651114,
+          "stddev_ns": 7480618,
+          "avg_ts": 15.198448,
+          "stddev_ts": 0.003375,
+          "samples_ns": [
+            33693196724,
+            33690613665,
+            33679142953
+          ],
+          "samples_ts": [
+            15.1959,
+            15.1971,
+            15.2023
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 787
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:10:47.773907+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:10:09Z\",\n    \"avg_ns\": 3203467953,\n    \"stddev_ns\": 224377,\n    \"avg_ts\": 39.956698,\n    \"stddev_ts\": 0.002614,\n    \"samples_ns\": [ 3203689287, 3203272451, 3203442123 ],\n    \"samples_ts\": [ 39.9539, 39.9591, 39.957 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:10:22Z\",\n    \"avg_ns\": 8344326226,\n    \"stddev_ns\": 6851209,\n    \"avg_ts\": 15.339771,\n    \"stddev_ts\": 0.012589,\n    \"samples_ns\": [ 8340316875, 8352237076, 8340424727 ],\n    \"samples_ts\": [ 15.3471, 15.3252, 15.3469 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:10:09Z",
+          "avg_ns": 3203467953,
+          "stddev_ns": 224377,
+          "avg_ts": 39.956698,
+          "stddev_ts": 0.002614,
+          "samples_ns": [
+            3203689287,
+            3203272451,
+            3203442123
+          ],
+          "samples_ts": [
+            39.9539,
+            39.9591,
+            39.957
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:10:22Z",
+          "avg_ns": 8344326226,
+          "stddev_ns": 6851209,
+          "avg_ts": 15.339771,
+          "stddev_ts": 0.012589,
+          "samples_ns": [
+            8340316875,
+            8352237076,
+            8340424727
+          ],
+          "samples_ts": [
+            15.3471,
+            15.3252,
+            15.3469
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 788
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:12:43.307732+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:10:48Z\",\n    \"avg_ns\": 3201567158,\n    \"stddev_ns\": 164724,\n    \"avg_ts\": 39.980420,\n    \"stddev_ts\": 0.001932,\n    \"samples_ns\": [ 3201665438, 3201647202, 3201388835 ],\n    \"samples_ts\": [ 39.9792, 39.9794, 39.9826 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:11:01Z\",\n    \"avg_ns\": 33859155449,\n    \"stddev_ns\": 16761405,\n    \"avg_ts\": 15.121466,\n    \"stddev_ts\": 0.007484,\n    \"samples_ns\": [ 33878417667, 33851158336, 33847890344 ],\n    \"samples_ts\": [ 15.1129, 15.125, 15.1265 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:10:48Z",
+          "avg_ns": 3201567158,
+          "stddev_ns": 164724,
+          "avg_ts": 39.98042,
+          "stddev_ts": 0.001932,
+          "samples_ns": [
+            3201665438,
+            3201647202,
+            3201388835
+          ],
+          "samples_ts": [
+            39.9792,
+            39.9794,
+            39.9826
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:11:01Z",
+          "avg_ns": 33859155449,
+          "stddev_ns": 16761405,
+          "avg_ts": 15.121466,
+          "stddev_ts": 0.007484,
+          "samples_ns": [
+            33878417667,
+            33851158336,
+            33847890344
+          ],
+          "samples_ts": [
+            15.1129,
+            15.125,
+            15.1265
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 789
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:14:01.988995+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:12:44Z\",\n    \"avg_ns\": 13141329446,\n    \"stddev_ns\": 650987,\n    \"avg_ts\": 38.961051,\n    \"stddev_ts\": 0.001869,\n    \"samples_ns\": [ 13140634930, 13141487661, 13141865749 ],\n    \"samples_ts\": [ 38.9631, 38.9606, 38.9595 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:13:36Z\",\n    \"avg_ns\": 8317873143,\n    \"stddev_ns\": 2102155,\n    \"avg_ts\": 15.388550,\n    \"stddev_ts\": 0.003881,\n    \"samples_ns\": [ 8317354753, 8316082735, 8320181943 ],\n    \"samples_ts\": [ 15.3895, 15.3919, 15.3843 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:12:44Z",
+          "avg_ns": 13141329446,
+          "stddev_ns": 650987,
+          "avg_ts": 38.961051,
+          "stddev_ts": 0.001869,
+          "samples_ns": [
+            13140634930,
+            13141487661,
+            13141865749
+          ],
+          "samples_ts": [
+            38.9631,
+            38.9606,
+            38.9595
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:13:36Z",
+          "avg_ns": 8317873143,
+          "stddev_ns": 2102155,
+          "avg_ts": 15.38855,
+          "stddev_ts": 0.003881,
+          "samples_ns": [
+            8317354753,
+            8316082735,
+            8320181943
+          ],
+          "samples_ts": [
+            15.3895,
+            15.3919,
+            15.3843
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 790
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:16:37.506704+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:14:02Z\",\n    \"avg_ns\": 13145794172,\n    \"stddev_ns\": 2877298,\n    \"avg_ts\": 38.947819,\n    \"stddev_ts\": 0.008510,\n    \"samples_ns\": [ 13149033096, 13143554560, 13144794862 ],\n    \"samples_ts\": [ 38.9382, 38.9545, 38.9508 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:14:55Z\",\n    \"avg_ns\": 33927502723,\n    \"stddev_ns\": 5146920,\n    \"avg_ts\": 15.091002,\n    \"stddev_ts\": 0.002286,\n    \"samples_ns\": [ 33925405637, 33933360072, 33923742462 ],\n    \"samples_ts\": [ 15.0919, 15.0884, 15.0927 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:14:02Z",
+          "avg_ns": 13145794172,
+          "stddev_ns": 2877298,
+          "avg_ts": 38.947819,
+          "stddev_ts": 0.00851,
+          "samples_ns": [
+            13149033096,
+            13143554560,
+            13144794862
+          ],
+          "samples_ts": [
+            38.9382,
+            38.9545,
+            38.9508
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:14:55Z",
+          "avg_ns": 33927502723,
+          "stddev_ns": 5146920,
+          "avg_ts": 15.091002,
+          "stddev_ts": 0.002286,
+          "samples_ns": [
+            33925405637,
+            33933360072,
+            33923742462
+          ],
+          "samples_ts": [
+            15.0919,
+            15.0884,
+            15.0927
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 791
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:17:11.289159+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:16:38Z\",\n    \"avg_ns\": 2207512967,\n    \"stddev_ns\": 1673964,\n    \"avg_ts\": 57.983827,\n    \"stddev_ts\": 0.043917,\n    \"samples_ns\": [ 2209421655, 2206302838, 2206814410 ],\n    \"samples_ts\": [ 57.9337, 58.0156, 58.0022 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:16:47Z\",\n    \"avg_ns\": 7929725840,\n    \"stddev_ns\": 7158562,\n    \"avg_ts\": 16.141803,\n    \"stddev_ts\": 0.014565,\n    \"samples_ns\": [ 7926122418, 7937970107, 7925084995 ],\n    \"samples_ts\": [ 16.1491, 16.125, 16.1512 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:16:38Z",
+          "avg_ns": 2207512967,
+          "stddev_ns": 1673964,
+          "avg_ts": 57.983827,
+          "stddev_ts": 0.043917,
+          "samples_ns": [
+            2209421655,
+            2206302838,
+            2206814410
+          ],
+          "samples_ts": [
+            57.9337,
+            58.0156,
+            58.0022
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:16:47Z",
+          "avg_ns": 7929725840,
+          "stddev_ns": 7158562,
+          "avg_ts": 16.141803,
+          "stddev_ts": 0.014565,
+          "samples_ns": [
+            7926122418,
+            7937970107,
+            7925084995
+          ],
+          "samples_ts": [
+            16.1491,
+            16.125,
+            16.1512
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 792
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:18:57.539277+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:17:12Z\",\n    \"avg_ns\": 2204686031,\n    \"stddev_ns\": 379107,\n    \"avg_ts\": 58.058155,\n    \"stddev_ts\": 0.009984,\n    \"samples_ns\": [ 2204728218, 2204287595, 2205042280 ],\n    \"samples_ts\": [ 58.057, 58.0686, 58.0488 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:17:21Z\",\n    \"avg_ns\": 32079330823,\n    \"stddev_ns\": 52446493,\n    \"avg_ts\": 15.960461,\n    \"stddev_ts\": 0.026111,\n    \"samples_ns\": [ 32122617180, 32094366701, 32021008588 ],\n    \"samples_ts\": [ 15.9389, 15.953, 15.9895 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:17:12Z",
+          "avg_ns": 2204686031,
+          "stddev_ns": 379107,
+          "avg_ts": 58.058155,
+          "stddev_ts": 0.009984,
+          "samples_ns": [
+            2204728218,
+            2204287595,
+            2205042280
+          ],
+          "samples_ts": [
+            58.057,
+            58.0686,
+            58.0488
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:17:21Z",
+          "avg_ns": 32079330823,
+          "stddev_ns": 52446493,
+          "avg_ts": 15.960461,
+          "stddev_ts": 0.026111,
+          "samples_ns": [
+            32122617180,
+            32094366701,
+            32021008588
+          ],
+          "samples_ts": [
+            15.9389,
+            15.953,
+            15.9895
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 793
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:19:58.040780+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:18:58Z\",\n    \"avg_ns\": 8872068291,\n    \"stddev_ns\": 5543724,\n    \"avg_ts\": 57.709219,\n    \"stddev_ts\": 0.036039,\n    \"samples_ns\": [ 8867696530, 8878301507, 8870206838 ],\n    \"samples_ts\": [ 57.7377, 57.6687, 57.7213 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:19:34Z\",\n    \"avg_ns\": 7959716308,\n    \"stddev_ns\": 530619,\n    \"avg_ts\": 16.080975,\n    \"stddev_ts\": 0.001041,\n    \"samples_ns\": [ 7959641852, 7959242186, 7960264888 ],\n    \"samples_ts\": [ 16.0811, 16.0819, 16.0799 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:18:58Z",
+          "avg_ns": 8872068291,
+          "stddev_ns": 5543724,
+          "avg_ts": 57.709219,
+          "stddev_ts": 0.036039,
+          "samples_ns": [
+            8867696530,
+            8878301507,
+            8870206838
+          ],
+          "samples_ts": [
+            57.7377,
+            57.6687,
+            57.7213
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:19:34Z",
+          "avg_ns": 7959716308,
+          "stddev_ns": 530619,
+          "avg_ts": 16.080975,
+          "stddev_ts": 0.001041,
+          "samples_ns": [
+            7959641852,
+            7959242186,
+            7960264888
+          ],
+          "samples_ts": [
+            16.0811,
+            16.0819,
+            16.0799
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 794
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:22:11.030102+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:19:59Z\",\n    \"avg_ns\": 8876333939,\n    \"stddev_ns\": 18136911,\n    \"avg_ts\": 57.681632,\n    \"stddev_ts\": 0.117722,\n    \"samples_ns\": [ 8864870191, 8866887686, 8897243941 ],\n    \"samples_ts\": [ 57.7561, 57.7429, 57.5459 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:20:34Z\",\n    \"avg_ns\": 32109879031,\n    \"stddev_ns\": 24764893,\n    \"avg_ts\": 15.945255,\n    \"stddev_ts\": 0.012292,\n    \"samples_ns\": [ 32138053506, 32100020860, 32091562729 ],\n    \"samples_ts\": [ 15.9313, 15.9501, 15.9543 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:19:59Z",
+          "avg_ns": 8876333939,
+          "stddev_ns": 18136911,
+          "avg_ts": 57.681632,
+          "stddev_ts": 0.117722,
+          "samples_ns": [
+            8864870191,
+            8866887686,
+            8897243941
+          ],
+          "samples_ts": [
+            57.7561,
+            57.7429,
+            57.5459
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:20:34Z",
+          "avg_ns": 32109879031,
+          "stddev_ns": 24764893,
+          "avg_ts": 15.945255,
+          "stddev_ts": 0.012292,
+          "samples_ns": [
+            32138053506,
+            32100020860,
+            32091562729
+          ],
+          "samples_ts": [
+            15.9313,
+            15.9501,
+            15.9543
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 795
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:22:44.861896+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:22:12Z\",\n    \"avg_ns\": 2206999251,\n    \"stddev_ns\": 239406,\n    \"avg_ts\": 57.997301,\n    \"stddev_ts\": 0.006169,\n    \"samples_ns\": [ 2207036287, 2207213284, 2206748183 ],\n    \"samples_ts\": [ 57.9963, 57.9917, 58.0039 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:22:20Z\",\n    \"avg_ns\": 7955939778,\n    \"stddev_ns\": 7821196,\n    \"avg_ts\": 16.088619,\n    \"stddev_ts\": 0.015817,\n    \"samples_ns\": [ 7956091194, 7948043973, 7963684167 ],\n    \"samples_ts\": [ 16.0883, 16.1046, 16.073 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:22:12Z",
+          "avg_ns": 2206999251,
+          "stddev_ns": 239406,
+          "avg_ts": 57.997301,
+          "stddev_ts": 0.006169,
+          "samples_ns": [
+            2207036287,
+            2207213284,
+            2206748183
+          ],
+          "samples_ts": [
+            57.9963,
+            57.9917,
+            58.0039
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:22:20Z",
+          "avg_ns": 7955939778,
+          "stddev_ns": 7821196,
+          "avg_ts": 16.088619,
+          "stddev_ts": 0.015817,
+          "samples_ns": [
+            7956091194,
+            7948043973,
+            7963684167
+          ],
+          "samples_ts": [
+            16.0883,
+            16.1046,
+            16.073
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 796
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:24:31.097235+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:22:45Z\",\n    \"avg_ns\": 2206383971,\n    \"stddev_ns\": 576728,\n    \"avg_ts\": 58.013477,\n    \"stddev_ts\": 0.015164,\n    \"samples_ns\": [ 2206980005, 2205828713, 2206343195 ],\n    \"samples_ts\": [ 57.9978, 58.0281, 58.0145 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:22:54Z\",\n    \"avg_ns\": 32088661910,\n    \"stddev_ns\": 14897546,\n    \"avg_ts\": 15.955794,\n    \"stddev_ts\": 0.007406,\n    \"samples_ns\": [ 32105805783, 32081315781, 32078864166 ],\n    \"samples_ts\": [ 15.9473, 15.9594, 15.9607 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:22:45Z",
+          "avg_ns": 2206383971,
+          "stddev_ns": 576728,
+          "avg_ts": 58.013477,
+          "stddev_ts": 0.015164,
+          "samples_ns": [
+            2206980005,
+            2205828713,
+            2206343195
+          ],
+          "samples_ts": [
+            57.9978,
+            58.0281,
+            58.0145
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:22:54Z",
+          "avg_ns": 32088661910,
+          "stddev_ns": 14897546,
+          "avg_ts": 15.955794,
+          "stddev_ts": 0.007406,
+          "samples_ns": [
+            32105805783,
+            32081315781,
+            32078864166
+          ],
+          "samples_ts": [
+            15.9473,
+            15.9594,
+            15.9607
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 797
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:25:31.680758+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:24:32Z\",\n    \"avg_ns\": 8878095563,\n    \"stddev_ns\": 3910458,\n    \"avg_ts\": 57.670033,\n    \"stddev_ts\": 0.025389,\n    \"samples_ns\": [ 8878736123, 8881643900, 8873906668 ],\n    \"samples_ts\": [ 57.6659, 57.647, 57.6972 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:25:07Z\",\n    \"avg_ns\": 7960182539,\n    \"stddev_ns\": 4143793,\n    \"avg_ts\": 16.080036,\n    \"stddev_ts\": 0.008369,\n    \"samples_ns\": [ 7964246071, 7960336831, 7955964716 ],\n    \"samples_ts\": [ 16.0718, 16.0797, 16.0886 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:24:32Z",
+          "avg_ns": 8878095563,
+          "stddev_ns": 3910458,
+          "avg_ts": 57.670033,
+          "stddev_ts": 0.025389,
+          "samples_ns": [
+            8878736123,
+            8881643900,
+            8873906668
+          ],
+          "samples_ts": [
+            57.6659,
+            57.647,
+            57.6972
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:25:07Z",
+          "avg_ns": 7960182539,
+          "stddev_ns": 4143793,
+          "avg_ts": 16.080036,
+          "stddev_ts": 0.008369,
+          "samples_ns": [
+            7964246071,
+            7960336831,
+            7955964716
+          ],
+          "samples_ts": [
+            16.0718,
+            16.0797,
+            16.0886
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 798
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:27:45.127231+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:25:32Z\",\n    \"avg_ns\": 8875567966,\n    \"stddev_ns\": 2896547,\n    \"avg_ts\": 57.686453,\n    \"stddev_ts\": 0.018823,\n    \"samples_ns\": [ 8874565628, 8878832552, 8873305718 ],\n    \"samples_ts\": [ 57.693, 57.6652, 57.7012 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:26:08Z\",\n    \"avg_ns\": 32264726894,\n    \"stddev_ns\": 5837779,\n    \"avg_ts\": 15.868723,\n    \"stddev_ts\": 0.002870,\n    \"samples_ns\": [ 32266434886, 32269517319, 32258228478 ],\n    \"samples_ts\": [ 15.8679, 15.8664, 15.8719 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:25:32Z",
+          "avg_ns": 8875567966,
+          "stddev_ns": 2896547,
+          "avg_ts": 57.686453,
+          "stddev_ts": 0.018823,
+          "samples_ns": [
+            8874565628,
+            8878832552,
+            8873305718
+          ],
+          "samples_ts": [
+            57.693,
+            57.6652,
+            57.7012
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:26:08Z",
+          "avg_ns": 32264726894,
+          "stddev_ns": 5837779,
+          "avg_ts": 15.868723,
+          "stddev_ts": 0.00287,
+          "samples_ns": [
+            32266434886,
+            32269517319,
+            32258228478
+          ],
+          "samples_ts": [
+            15.8679,
+            15.8664,
+            15.8719
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 799
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:28:18.986803+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:27:46Z\",\n    \"avg_ns\": 2204303485,\n    \"stddev_ns\": 1161952,\n    \"avg_ts\": 58.068240,\n    \"stddev_ts\": 0.030607,\n    \"samples_ns\": [ 2204176893, 2203210012, 2205523550 ],\n    \"samples_ts\": [ 58.0716, 58.097, 58.0361 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:27:54Z\",\n    \"avg_ns\": 7961093977,\n    \"stddev_ns\": 7289634,\n    \"avg_ts\": 16.078201,\n    \"stddev_ts\": 0.014714,\n    \"samples_ns\": [ 7958695808, 7969279438, 7955306687 ],\n    \"samples_ts\": [ 16.083, 16.0617, 16.0899 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:27:46Z",
+          "avg_ns": 2204303485,
+          "stddev_ns": 1161952,
+          "avg_ts": 58.06824,
+          "stddev_ts": 0.030607,
+          "samples_ns": [
+            2204176893,
+            2203210012,
+            2205523550
+          ],
+          "samples_ts": [
+            58.0716,
+            58.097,
+            58.0361
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:27:54Z",
+          "avg_ns": 7961093977,
+          "stddev_ns": 7289634,
+          "avg_ts": 16.078201,
+          "stddev_ts": 0.014714,
+          "samples_ns": [
+            7958695808,
+            7969279438,
+            7955306687
+          ],
+          "samples_ts": [
+            16.083,
+            16.0617,
+            16.0899
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 800
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:30:04.983636+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:28:19Z\",\n    \"avg_ns\": 2204795804,\n    \"stddev_ns\": 343694,\n    \"avg_ts\": 58.055264,\n    \"stddev_ts\": 0.008879,\n    \"samples_ns\": [ 2205146705, 2204766533, 2204474176 ],\n    \"samples_ts\": [ 58.046, 58.056, 58.0637 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:28:28Z\",\n    \"avg_ns\": 32003679016,\n    \"stddev_ns\": 21989078,\n    \"avg_ts\": 15.998166,\n    \"stddev_ts\": 0.010987,\n    \"samples_ns\": [ 32028944119, 31993224422, 31988868508 ],\n    \"samples_ts\": [ 15.9855, 16.0034, 16.0056 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:28:19Z",
+          "avg_ns": 2204795804,
+          "stddev_ns": 343694,
+          "avg_ts": 58.055264,
+          "stddev_ts": 0.008879,
+          "samples_ns": [
+            2205146705,
+            2204766533,
+            2204474176
+          ],
+          "samples_ts": [
+            58.046,
+            58.056,
+            58.0637
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:28:28Z",
+          "avg_ns": 32003679016,
+          "stddev_ns": 21989078,
+          "avg_ts": 15.998166,
+          "stddev_ts": 0.010987,
+          "samples_ns": [
+            32028944119,
+            31993224422,
+            31988868508
+          ],
+          "samples_ts": [
+            15.9855,
+            16.0034,
+            16.0056
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 801
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:31:06.321042+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:30:05Z\",\n    \"avg_ns\": 9069210350,\n    \"stddev_ns\": 5316900,\n    \"avg_ts\": 56.454763,\n    \"stddev_ts\": 0.033104,\n    \"samples_ns\": [ 9073750715, 9070519029, 9063361306 ],\n    \"samples_ts\": [ 56.4265, 56.4466, 56.4912 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:30:42Z\",\n    \"avg_ns\": 7964512742,\n    \"stddev_ns\": 1151952,\n    \"avg_ts\": 16.071291,\n    \"stddev_ts\": 0.002311,\n    \"samples_ns\": [ 7964572114, 7963339194, 7965626920 ],\n    \"samples_ts\": [ 16.0712, 16.0737, 16.069 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:30:05Z",
+          "avg_ns": 9069210350,
+          "stddev_ns": 5316900,
+          "avg_ts": 56.454763,
+          "stddev_ts": 0.033104,
+          "samples_ns": [
+            9073750715,
+            9070519029,
+            9063361306
+          ],
+          "samples_ts": [
+            56.4265,
+            56.4466,
+            56.4912
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:30:42Z",
+          "avg_ns": 7964512742,
+          "stddev_ns": 1151952,
+          "avg_ts": 16.071291,
+          "stddev_ts": 0.002311,
+          "samples_ns": [
+            7964572114,
+            7963339194,
+            7965626920
+          ],
+          "samples_ts": [
+            16.0712,
+            16.0737,
+            16.069
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 802
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:33:20.465884+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:31:07Z\",\n    \"avg_ns\": 9064604509,\n    \"stddev_ns\": 654874,\n    \"avg_ts\": 56.483435,\n    \"stddev_ts\": 0.004081,\n    \"samples_ns\": [ 9063995662, 9064520551, 9065297314 ],\n    \"samples_ts\": [ 56.4872, 56.484, 56.4791 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:31:43Z\",\n    \"avg_ns\": 32245000768,\n    \"stddev_ns\": 6853404,\n    \"avg_ts\": 15.878431,\n    \"stddev_ts\": 0.003375,\n    \"samples_ns\": [ 32242944400, 32252646933, 32239410971 ],\n    \"samples_ts\": [ 15.8794, 15.8747, 15.8812 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:31:07Z",
+          "avg_ns": 9064604509,
+          "stddev_ns": 654874,
+          "avg_ts": 56.483435,
+          "stddev_ts": 0.004081,
+          "samples_ns": [
+            9063995662,
+            9064520551,
+            9065297314
+          ],
+          "samples_ts": [
+            56.4872,
+            56.484,
+            56.4791
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:31:43Z",
+          "avg_ns": 32245000768,
+          "stddev_ns": 6853404,
+          "avg_ts": 15.878431,
+          "stddev_ts": 0.003375,
+          "samples_ns": [
+            32242944400,
+            32252646933,
+            32239410971
+          ],
+          "samples_ts": [
+            15.8794,
+            15.8747,
+            15.8812
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 803
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:33:54.439879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:33:21Z\",\n    \"avg_ns\": 2203004015,\n    \"stddev_ns\": 748764,\n    \"avg_ts\": 58.102486,\n    \"stddev_ts\": 0.019674,\n    \"samples_ns\": [ 2202153126, 2203544490, 2203314431 ],\n    \"samples_ts\": [ 58.1249, 58.0882, 58.0943 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:33:30Z\",\n    \"avg_ns\": 7990634749,\n    \"stddev_ns\": 9044458,\n    \"avg_ts\": 16.018766,\n    \"stddev_ts\": 0.018119,\n    \"samples_ns\": [ 8000990910, 7984295332, 7986618007 ],\n    \"samples_ts\": [ 15.998, 16.0315, 16.0268 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:33:21Z",
+          "avg_ns": 2203004015,
+          "stddev_ns": 748764,
+          "avg_ts": 58.102486,
+          "stddev_ts": 0.019674,
+          "samples_ns": [
+            2202153126,
+            2203544490,
+            2203314431
+          ],
+          "samples_ts": [
+            58.1249,
+            58.0882,
+            58.0943
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:33:30Z",
+          "avg_ns": 7990634749,
+          "stddev_ns": 9044458,
+          "avg_ts": 16.018766,
+          "stddev_ts": 0.018119,
+          "samples_ns": [
+            8000990910,
+            7984295332,
+            7986618007
+          ],
+          "samples_ts": [
+            15.998,
+            16.0315,
+            16.0268
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 804
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:35:40.644629+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:33:55Z\",\n    \"avg_ns\": 2202466618,\n    \"stddev_ns\": 5520640,\n    \"avg_ts\": 58.116902,\n    \"stddev_ts\": 0.145566,\n    \"samples_ns\": [ 2197610845, 2201317694, 2208471315 ],\n    \"samples_ts\": [ 58.2451, 58.147, 57.9586 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:34:04Z\",\n    \"avg_ns\": 32070962558,\n    \"stddev_ns\": 72202496,\n    \"avg_ts\": 15.964651,\n    \"stddev_ts\": 0.035988,\n    \"samples_ns\": [ 31987676423, 32115886669, 32109324582 ],\n    \"samples_ts\": [ 16.0062, 15.9423, 15.9455 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:33:55Z",
+          "avg_ns": 2202466618,
+          "stddev_ns": 5520640,
+          "avg_ts": 58.116902,
+          "stddev_ts": 0.145566,
+          "samples_ns": [
+            2197610845,
+            2201317694,
+            2208471315
+          ],
+          "samples_ts": [
+            58.2451,
+            58.147,
+            57.9586
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:34:04Z",
+          "avg_ns": 32070962558,
+          "stddev_ns": 72202496,
+          "avg_ts": 15.964651,
+          "stddev_ts": 0.035988,
+          "samples_ns": [
+            31987676423,
+            32115886669,
+            32109324582
+          ],
+          "samples_ts": [
+            16.0062,
+            15.9423,
+            15.9455
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 805
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:36:41.147172+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:35:41Z\",\n    \"avg_ns\": 8870536730,\n    \"stddev_ns\": 2031337,\n    \"avg_ts\": 57.719170,\n    \"stddev_ts\": 0.013204,\n    \"samples_ns\": [ 8868310480, 8872282564, 8871017147 ],\n    \"samples_ts\": [ 57.7337, 57.7078, 57.716 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:36:17Z\",\n    \"avg_ns\": 7954137726,\n    \"stddev_ns\": 901686,\n    \"avg_ts\": 16.092254,\n    \"stddev_ts\": 0.001815,\n    \"samples_ns\": [ 7954892929, 7953145848, 7954374402 ],\n    \"samples_ts\": [ 16.0907, 16.0943, 16.0918 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:35:41Z",
+          "avg_ns": 8870536730,
+          "stddev_ns": 2031337,
+          "avg_ts": 57.71917,
+          "stddev_ts": 0.013204,
+          "samples_ns": [
+            8868310480,
+            8872282564,
+            8871017147
+          ],
+          "samples_ts": [
+            57.7337,
+            57.7078,
+            57.716
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:36:17Z",
+          "avg_ns": 7954137726,
+          "stddev_ns": 901686,
+          "avg_ts": 16.092254,
+          "stddev_ts": 0.001815,
+          "samples_ns": [
+            7954892929,
+            7953145848,
+            7954374402
+          ],
+          "samples_ts": [
+            16.0907,
+            16.0943,
+            16.0918
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 806
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:38:54.234469+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:36:42Z\",\n    \"avg_ns\": 8873897681,\n    \"stddev_ns\": 3670695,\n    \"avg_ts\": 57.697314,\n    \"stddev_ts\": 0.023861,\n    \"samples_ns\": [ 8878130398, 8871588755, 8871973890 ],\n    \"samples_ts\": [ 57.6698, 57.7123, 57.7098 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:37:17Z\",\n    \"avg_ns\": 32149260048,\n    \"stddev_ns\": 15984354,\n    \"avg_ts\": 15.925719,\n    \"stddev_ts\": 0.007917,\n    \"samples_ns\": [ 32134576770, 32166285099, 32146918276 ],\n    \"samples_ts\": [ 15.933, 15.9173, 15.9269 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:36:42Z",
+          "avg_ns": 8873897681,
+          "stddev_ns": 3670695,
+          "avg_ts": 57.697314,
+          "stddev_ts": 0.023861,
+          "samples_ns": [
+            8878130398,
+            8871588755,
+            8871973890
+          ],
+          "samples_ts": [
+            57.6698,
+            57.7123,
+            57.7098
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:37:17Z",
+          "avg_ns": 32149260048,
+          "stddev_ns": 15984354,
+          "avg_ts": 15.925719,
+          "stddev_ts": 0.007917,
+          "samples_ns": [
+            32134576770,
+            32166285099,
+            32146918276
+          ],
+          "samples_ts": [
+            15.933,
+            15.9173,
+            15.9269
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 807
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:39:28.000849+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:38:55Z\",\n    \"avg_ns\": 2206319327,\n    \"stddev_ns\": 3948561,\n    \"avg_ts\": 58.015298,\n    \"stddev_ts\": 0.103709,\n    \"samples_ns\": [ 2203739608, 2204354113, 2210864262 ],\n    \"samples_ts\": [ 58.0831, 58.0669, 57.8959 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:39:04Z\",\n    \"avg_ns\": 7933045660,\n    \"stddev_ns\": 8979992,\n    \"avg_ts\": 16.135053,\n    \"stddev_ts\": 0.018276,\n    \"samples_ns\": [ 7937825754, 7938624508, 7922686718 ],\n    \"samples_ts\": [ 16.1253, 16.1237, 16.1561 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:38:55Z",
+          "avg_ns": 2206319327,
+          "stddev_ns": 3948561,
+          "avg_ts": 58.015298,
+          "stddev_ts": 0.103709,
+          "samples_ns": [
+            2203739608,
+            2204354113,
+            2210864262
+          ],
+          "samples_ts": [
+            58.0831,
+            58.0669,
+            57.8959
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:39:04Z",
+          "avg_ns": 7933045660,
+          "stddev_ns": 8979992,
+          "avg_ts": 16.135053,
+          "stddev_ts": 0.018276,
+          "samples_ns": [
+            7937825754,
+            7938624508,
+            7922686718
+          ],
+          "samples_ts": [
+            16.1253,
+            16.1237,
+            16.1561
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 808
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:41:14.089380+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:39:29Z\",\n    \"avg_ns\": 2200124377,\n    \"stddev_ns\": 1000409,\n    \"avg_ts\": 58.178537,\n    \"stddev_ts\": 0.026461,\n    \"samples_ns\": [ 2200815830, 2198977252, 2200580049 ],\n    \"samples_ts\": [ 58.1603, 58.2089, 58.1665 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:39:37Z\",\n    \"avg_ns\": 32039154046,\n    \"stddev_ns\": 39638332,\n    \"avg_ts\": 15.980463,\n    \"stddev_ts\": 0.019777,\n    \"samples_ns\": [ 31996439286, 32046273440, 32074749414 ],\n    \"samples_ts\": [ 16.0018, 15.9769, 15.9627 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:39:29Z",
+          "avg_ns": 2200124377,
+          "stddev_ns": 1000409,
+          "avg_ts": 58.178537,
+          "stddev_ts": 0.026461,
+          "samples_ns": [
+            2200815830,
+            2198977252,
+            2200580049
+          ],
+          "samples_ts": [
+            58.1603,
+            58.2089,
+            58.1665
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:39:37Z",
+          "avg_ns": 32039154046,
+          "stddev_ns": 39638332,
+          "avg_ts": 15.980463,
+          "stddev_ts": 0.019777,
+          "samples_ns": [
+            31996439286,
+            32046273440,
+            32074749414
+          ],
+          "samples_ts": [
+            16.0018,
+            15.9769,
+            15.9627
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 809
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:42:14.525285+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:41:15Z\",\n    \"avg_ns\": 8887140288,\n    \"stddev_ns\": 4070362,\n    \"avg_ts\": 57.611341,\n    \"stddev_ts\": 0.026379,\n    \"samples_ns\": [ 8887049399, 8883117224, 8891254242 ],\n    \"samples_ts\": [ 57.6119, 57.6374, 57.5847 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:41:50Z\",\n    \"avg_ns\": 7917685818,\n    \"stddev_ns\": 4601880,\n    \"avg_ts\": 16.166344,\n    \"stddev_ts\": 0.009394,\n    \"samples_ns\": [ 7918666870, 7921716313, 7912674273 ],\n    \"samples_ts\": [ 16.1643, 16.1581, 16.1766 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:41:15Z",
+          "avg_ns": 8887140288,
+          "stddev_ns": 4070362,
+          "avg_ts": 57.611341,
+          "stddev_ts": 0.026379,
+          "samples_ns": [
+            8887049399,
+            8883117224,
+            8891254242
+          ],
+          "samples_ts": [
+            57.6119,
+            57.6374,
+            57.5847
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:41:50Z",
+          "avg_ns": 7917685818,
+          "stddev_ns": 4601880,
+          "avg_ts": 16.166344,
+          "stddev_ts": 0.009394,
+          "samples_ns": [
+            7918666870,
+            7921716313,
+            7912674273
+          ],
+          "samples_ts": [
+            16.1643,
+            16.1581,
+            16.1766
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 810
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:44:27.647927+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:42:15Z\",\n    \"avg_ns\": 8874619479,\n    \"stddev_ns\": 1191039,\n    \"avg_ts\": 57.692615,\n    \"stddev_ts\": 0.007718,\n    \"samples_ns\": [ 8875990388, 8873921427, 8873946623 ],\n    \"samples_ts\": [ 57.6837, 57.6972, 57.697 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:42:51Z\",\n    \"avg_ns\": 32152490010,\n    \"stddev_ns\": 8851922,\n    \"avg_ts\": 15.924117,\n    \"stddev_ts\": 0.004383,\n    \"samples_ns\": [ 32162582147, 32146041048, 32148846835 ],\n    \"samples_ts\": [ 15.9191, 15.9273, 15.9259 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:42:15Z",
+          "avg_ns": 8874619479,
+          "stddev_ns": 1191039,
+          "avg_ts": 57.692615,
+          "stddev_ts": 0.007718,
+          "samples_ns": [
+            8875990388,
+            8873921427,
+            8873946623
+          ],
+          "samples_ts": [
+            57.6837,
+            57.6972,
+            57.697
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:42:51Z",
+          "avg_ns": 32152490010,
+          "stddev_ns": 8851922,
+          "avg_ts": 15.924117,
+          "stddev_ts": 0.004383,
+          "samples_ns": [
+            32162582147,
+            32146041048,
+            32148846835
+          ],
+          "samples_ts": [
+            15.9191,
+            15.9273,
+            15.9259
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 811
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:45:01.534008+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:44:28Z\",\n    \"avg_ns\": 2192091840,\n    \"stddev_ns\": 1099606,\n    \"avg_ts\": 58.391724,\n    \"stddev_ts\": 0.029265,\n    \"samples_ns\": [ 2193166986, 2192137353, 2190971182 ],\n    \"samples_ts\": [ 58.3631, 58.3905, 58.4216 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:44:37Z\",\n    \"avg_ns\": 7980476060,\n    \"stddev_ns\": 2413209,\n    \"avg_ts\": 16.039144,\n    \"stddev_ts\": 0.004848,\n    \"samples_ns\": [ 7977692214, 7981810975, 7981924992 ],\n    \"samples_ts\": [ 16.0447, 16.0365, 16.0362 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:44:28Z",
+          "avg_ns": 2192091840,
+          "stddev_ns": 1099606,
+          "avg_ts": 58.391724,
+          "stddev_ts": 0.029265,
+          "samples_ns": [
+            2193166986,
+            2192137353,
+            2190971182
+          ],
+          "samples_ts": [
+            58.3631,
+            58.3905,
+            58.4216
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:44:37Z",
+          "avg_ns": 7980476060,
+          "stddev_ns": 2413209,
+          "avg_ts": 16.039144,
+          "stddev_ts": 0.004848,
+          "samples_ns": [
+            7977692214,
+            7981810975,
+            7981924992
+          ],
+          "samples_ts": [
+            16.0447,
+            16.0365,
+            16.0362
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 812
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:46:48.209819+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:45:02Z\",\n    \"avg_ns\": 2206223132,\n    \"stddev_ns\": 842289,\n    \"avg_ts\": 58.017709,\n    \"stddev_ts\": 0.022076,\n    \"samples_ns\": [ 2205713075, 2207192248, 2205764075 ],\n    \"samples_ts\": [ 58.0311, 57.9922, 58.0298 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:45:11Z\",\n    \"avg_ns\": 32224410116,\n    \"stddev_ns\": 3915271,\n    \"avg_ts\": 15.888576,\n    \"stddev_ts\": 0.001928,\n    \"samples_ns\": [ 32220811485, 32228572590, 32223846274 ],\n    \"samples_ts\": [ 15.8904, 15.8865, 15.8889 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:45:02Z",
+          "avg_ns": 2206223132,
+          "stddev_ns": 842289,
+          "avg_ts": 58.017709,
+          "stddev_ts": 0.022076,
+          "samples_ns": [
+            2205713075,
+            2207192248,
+            2205764075
+          ],
+          "samples_ts": [
+            58.0311,
+            57.9922,
+            58.0298
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:45:11Z",
+          "avg_ns": 32224410116,
+          "stddev_ns": 3915271,
+          "avg_ts": 15.888576,
+          "stddev_ts": 0.001928,
+          "samples_ns": [
+            32220811485,
+            32228572590,
+            32223846274
+          ],
+          "samples_ts": [
+            15.8904,
+            15.8865,
+            15.8889
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 813
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:47:49.525782+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:46:49Z\",\n    \"avg_ns\": 9056554823,\n    \"stddev_ns\": 3525718,\n    \"avg_ts\": 56.533645,\n    \"stddev_ts\": 0.022005,\n    \"samples_ns\": [ 9058562253, 9058616937, 9052485280 ],\n    \"samples_ts\": [ 56.5211, 56.5208, 56.5591 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:47:25Z\",\n    \"avg_ns\": 7968254449,\n    \"stddev_ns\": 922573,\n    \"avg_ts\": 16.063744,\n    \"stddev_ts\": 0.001860,\n    \"samples_ns\": [ 7969040874, 7967238907, 7968483566 ],\n    \"samples_ts\": [ 16.0622, 16.0658, 16.0633 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:46:49Z",
+          "avg_ns": 9056554823,
+          "stddev_ns": 3525718,
+          "avg_ts": 56.533645,
+          "stddev_ts": 0.022005,
+          "samples_ns": [
+            9058562253,
+            9058616937,
+            9052485280
+          ],
+          "samples_ts": [
+            56.5211,
+            56.5208,
+            56.5591
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:47:25Z",
+          "avg_ns": 7968254449,
+          "stddev_ns": 922573,
+          "avg_ts": 16.063744,
+          "stddev_ts": 0.00186,
+          "samples_ns": [
+            7969040874,
+            7967238907,
+            7968483566
+          ],
+          "samples_ts": [
+            16.0622,
+            16.0658,
+            16.0633
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 814
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:50:03.482469+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:47:50Z\",\n    \"avg_ns\": 9064855331,\n    \"stddev_ns\": 553702,\n    \"avg_ts\": 56.481872,\n    \"stddev_ts\": 0.003399,\n    \"samples_ns\": [ 9064225503, 9065173173, 9065167318 ],\n    \"samples_ts\": [ 56.4858, 56.4799, 56.4799 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:48:26Z\",\n    \"avg_ns\": 32174546918,\n    \"stddev_ns\": 11795932,\n    \"avg_ts\": 15.913201,\n    \"stddev_ts\": 0.005833,\n    \"samples_ns\": [ 32168214955, 32188156735, 32167269064 ],\n    \"samples_ts\": [ 15.9163, 15.9065, 15.9168 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:47:50Z",
+          "avg_ns": 9064855331,
+          "stddev_ns": 553702,
+          "avg_ts": 56.481872,
+          "stddev_ts": 0.003399,
+          "samples_ns": [
+            9064225503,
+            9065173173,
+            9065167318
+          ],
+          "samples_ts": [
+            56.4858,
+            56.4799,
+            56.4799
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:48:26Z",
+          "avg_ns": 32174546918,
+          "stddev_ns": 11795932,
+          "avg_ts": 15.913201,
+          "stddev_ts": 0.005833,
+          "samples_ns": [
+            32168214955,
+            32188156735,
+            32167269064
+          ],
+          "samples_ts": [
+            15.9163,
+            15.9065,
+            15.9168
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 815
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:50:37.244837+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:50:04Z\",\n    \"avg_ns\": 2207363133,\n    \"stddev_ns\": 2554011,\n    \"avg_ts\": 57.987792,\n    \"stddev_ts\": 0.067058,\n    \"samples_ns\": [ 2210247082, 2206455111, 2205387206 ],\n    \"samples_ts\": [ 57.9121, 58.0116, 58.0397 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:50:13Z\",\n    \"avg_ns\": 7920669864,\n    \"stddev_ns\": 15576326,\n    \"avg_ts\": 16.160291,\n    \"stddev_ts\": 0.031745,\n    \"samples_ns\": [ 7910252616, 7913181059, 7938575918 ],\n    \"samples_ts\": [ 16.1815, 16.1755, 16.1238 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:50:04Z",
+          "avg_ns": 2207363133,
+          "stddev_ns": 2554011,
+          "avg_ts": 57.987792,
+          "stddev_ts": 0.067058,
+          "samples_ns": [
+            2210247082,
+            2206455111,
+            2205387206
+          ],
+          "samples_ts": [
+            57.9121,
+            58.0116,
+            58.0397
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:50:13Z",
+          "avg_ns": 7920669864,
+          "stddev_ns": 15576326,
+          "avg_ts": 16.160291,
+          "stddev_ts": 0.031745,
+          "samples_ns": [
+            7910252616,
+            7913181059,
+            7938575918
+          ],
+          "samples_ts": [
+            16.1815,
+            16.1755,
+            16.1238
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 816
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:52:23.915565+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:50:38Z\",\n    \"avg_ns\": 2204914212,\n    \"stddev_ns\": 1387411,\n    \"avg_ts\": 58.052161,\n    \"stddev_ts\": 0.036517,\n    \"samples_ns\": [ 2203861639, 2204394564, 2206486433 ],\n    \"samples_ts\": [ 58.0799, 58.0658, 58.0108 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:50:47Z\",\n    \"avg_ns\": 32211253157,\n    \"stddev_ns\": 16173918,\n    \"avg_ts\": 15.895069,\n    \"stddev_ts\": 0.007983,\n    \"samples_ns\": [ 32218859594, 32222220519, 32192679359 ],\n    \"samples_ts\": [ 15.8913, 15.8897, 15.9042 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:50:38Z",
+          "avg_ns": 2204914212,
+          "stddev_ns": 1387411,
+          "avg_ts": 58.052161,
+          "stddev_ts": 0.036517,
+          "samples_ns": [
+            2203861639,
+            2204394564,
+            2206486433
+          ],
+          "samples_ts": [
+            58.0799,
+            58.0658,
+            58.0108
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:50:47Z",
+          "avg_ns": 32211253157,
+          "stddev_ns": 16173918,
+          "avg_ts": 15.895069,
+          "stddev_ts": 0.007983,
+          "samples_ns": [
+            32218859594,
+            32222220519,
+            32192679359
+          ],
+          "samples_ts": [
+            15.8913,
+            15.8897,
+            15.9042
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 817
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:53:24.375759+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:52:24Z\",\n    \"avg_ns\": 8862923950,\n    \"stddev_ns\": 2755253,\n    \"avg_ts\": 57.768749,\n    \"stddev_ts\": 0.017947,\n    \"samples_ns\": [ 8865930021, 8862318253, 8860523577 ],\n    \"samples_ts\": [ 57.7492, 57.7727, 57.7844 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:53:00Z\",\n    \"avg_ns\": 7942695913,\n    \"stddev_ns\": 729509,\n    \"avg_ts\": 16.115435,\n    \"stddev_ts\": 0.001469,\n    \"samples_ns\": [ 7941880319, 7943262913, 7942944508 ],\n    \"samples_ts\": [ 16.1171, 16.1143, 16.1149 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:52:24Z",
+          "avg_ns": 8862923950,
+          "stddev_ns": 2755253,
+          "avg_ts": 57.768749,
+          "stddev_ts": 0.017947,
+          "samples_ns": [
+            8865930021,
+            8862318253,
+            8860523577
+          ],
+          "samples_ts": [
+            57.7492,
+            57.7727,
+            57.7844
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:53:00Z",
+          "avg_ns": 7942695913,
+          "stddev_ns": 729509,
+          "avg_ts": 16.115435,
+          "stddev_ts": 0.001469,
+          "samples_ns": [
+            7941880319,
+            7943262913,
+            7942944508
+          ],
+          "samples_ts": [
+            16.1171,
+            16.1143,
+            16.1149
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 818
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:55:37.419629+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:53:25Z\",\n    \"avg_ns\": 8868358545,\n    \"stddev_ns\": 1984618,\n    \"avg_ts\": 57.733347,\n    \"stddev_ts\": 0.012904,\n    \"samples_ns\": [ 8866710234, 8870558253, 8867807149 ],\n    \"samples_ts\": [ 57.7441, 57.719, 57.7369 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:54:00Z\",\n    \"avg_ns\": 32132375647,\n    \"stddev_ns\": 9259148,\n    \"avg_ts\": 15.934086,\n    \"stddev_ts\": 0.004590,\n    \"samples_ns\": [ 32131619769, 32123519347, 32141987826 ],\n    \"samples_ts\": [ 15.9345, 15.9385, 15.9293 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:53:25Z",
+          "avg_ns": 8868358545,
+          "stddev_ns": 1984618,
+          "avg_ts": 57.733347,
+          "stddev_ts": 0.012904,
+          "samples_ns": [
+            8866710234,
+            8870558253,
+            8867807149
+          ],
+          "samples_ts": [
+            57.7441,
+            57.719,
+            57.7369
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:54:00Z",
+          "avg_ns": 32132375647,
+          "stddev_ns": 9259148,
+          "avg_ts": 15.934086,
+          "stddev_ts": 0.00459,
+          "samples_ns": [
+            32131619769,
+            32123519347,
+            32141987826
+          ],
+          "samples_ts": [
+            15.9345,
+            15.9385,
+            15.9293
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 819
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:56:11.262173+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:55:38Z\",\n    \"avg_ns\": 2199540306,\n    \"stddev_ns\": 6447929,\n    \"avg_ts\": 58.194311,\n    \"stddev_ts\": 0.170339,\n    \"samples_ns\": [ 2206867753, 2197017985, 2194735182 ],\n    \"samples_ts\": [ 58.0008, 58.2608, 58.3214 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:55:47Z\",\n    \"avg_ns\": 7954063991,\n    \"stddev_ns\": 16957834,\n    \"avg_ts\": 16.092451,\n    \"stddev_ts\": 0.034274,\n    \"samples_ns\": [ 7973245693, 7947881201, 7941065079 ],\n    \"samples_ts\": [ 16.0537, 16.1049, 16.1187 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:55:38Z",
+          "avg_ns": 2199540306,
+          "stddev_ns": 6447929,
+          "avg_ts": 58.194311,
+          "stddev_ts": 0.170339,
+          "samples_ns": [
+            2206867753,
+            2197017985,
+            2194735182
+          ],
+          "samples_ts": [
+            58.0008,
+            58.2608,
+            58.3214
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:55:47Z",
+          "avg_ns": 7954063991,
+          "stddev_ns": 16957834,
+          "avg_ts": 16.092451,
+          "stddev_ts": 0.034274,
+          "samples_ns": [
+            7973245693,
+            7947881201,
+            7941065079
+          ],
+          "samples_ts": [
+            16.0537,
+            16.1049,
+            16.1187
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 820
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:57:57.640426+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:56:12Z\",\n    \"avg_ns\": 2197882562,\n    \"stddev_ns\": 9548257,\n    \"avg_ts\": 58.238601,\n    \"stddev_ts\": 0.252370,\n    \"samples_ns\": [ 2208906767, 2192501657, 2192239263 ],\n    \"samples_ts\": [ 57.9472, 58.3808, 58.3878 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:56:21Z\",\n    \"avg_ns\": 32131257628,\n    \"stddev_ns\": 19765653,\n    \"avg_ts\": 15.934643,\n    \"stddev_ts\": 0.009799,\n    \"samples_ns\": [ 32114217167, 32152924487, 32126631232 ],\n    \"samples_ts\": [ 15.9431, 15.9239, 15.9369 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:56:12Z",
+          "avg_ns": 2197882562,
+          "stddev_ns": 9548257,
+          "avg_ts": 58.238601,
+          "stddev_ts": 0.25237,
+          "samples_ns": [
+            2208906767,
+            2192501657,
+            2192239263
+          ],
+          "samples_ts": [
+            57.9472,
+            58.3808,
+            58.3878
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:56:21Z",
+          "avg_ns": 32131257628,
+          "stddev_ns": 19765653,
+          "avg_ts": 15.934643,
+          "stddev_ts": 0.009799,
+          "samples_ns": [
+            32114217167,
+            32152924487,
+            32126631232
+          ],
+          "samples_ts": [
+            15.9431,
+            15.9239,
+            15.9369
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 821
+    },
+    {
+      "timestamp_utc": "2025-12-09T14:58:58.107235+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:57:58Z\",\n    \"avg_ns\": 8850827120,\n    \"stddev_ns\": 37769064,\n    \"avg_ts\": 57.848405,\n    \"stddev_ts\": 0.247455,\n    \"samples_ns\": [ 8807292304, 8870351042, 8874838015 ],\n    \"samples_ts\": [ 58.1336, 57.7204, 57.6912 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:58:34Z\",\n    \"avg_ns\": 7964399316,\n    \"stddev_ns\": 2664830,\n    \"avg_ts\": 16.071521,\n    \"stddev_ts\": 0.005376,\n    \"samples_ns\": [ 7962864753, 7962856801, 7967476394 ],\n    \"samples_ts\": [ 16.0746, 16.0746, 16.0653 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:57:58Z",
+          "avg_ns": 8850827120,
+          "stddev_ns": 37769064,
+          "avg_ts": 57.848405,
+          "stddev_ts": 0.247455,
+          "samples_ns": [
+            8807292304,
+            8870351042,
+            8874838015
+          ],
+          "samples_ts": [
+            58.1336,
+            57.7204,
+            57.6912
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:58:34Z",
+          "avg_ns": 7964399316,
+          "stddev_ns": 2664830,
+          "avg_ts": 16.071521,
+          "stddev_ts": 0.005376,
+          "samples_ns": [
+            7962864753,
+            7962856801,
+            7967476394
+          ],
+          "samples_ts": [
+            16.0746,
+            16.0746,
+            16.0653
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 822
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:01:11.193696+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:58:59Z\",\n    \"avg_ns\": 8870273163,\n    \"stddev_ns\": 1939515,\n    \"avg_ts\": 57.720885,\n    \"stddev_ts\": 0.012619,\n    \"samples_ns\": [ 8872491228, 8869432203, 8868896058 ],\n    \"samples_ts\": [ 57.7065, 57.7264, 57.7298 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T14:59:34Z\",\n    \"avg_ns\": 32140400146,\n    \"stddev_ns\": 19713216,\n    \"avg_ts\": 15.930111,\n    \"stddev_ts\": 0.009767,\n    \"samples_ns\": [ 32162838961, 32132490339, 32125871139 ],\n    \"samples_ts\": [ 15.919, 15.934, 15.9373 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:58:59Z",
+          "avg_ns": 8870273163,
+          "stddev_ns": 1939515,
+          "avg_ts": 57.720885,
+          "stddev_ts": 0.012619,
+          "samples_ns": [
+            8872491228,
+            8869432203,
+            8868896058
+          ],
+          "samples_ts": [
+            57.7065,
+            57.7264,
+            57.7298
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T14:59:34Z",
+          "avg_ns": 32140400146,
+          "stddev_ns": 19713216,
+          "avg_ts": 15.930111,
+          "stddev_ts": 0.009767,
+          "samples_ns": [
+            32162838961,
+            32132490339,
+            32125871139
+          ],
+          "samples_ts": [
+            15.919,
+            15.934,
+            15.9373
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 823
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:01:45.039678+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:01:12Z\",\n    \"avg_ns\": 2206749735,\n    \"stddev_ns\": 663002,\n    \"avg_ts\": 58.003862,\n    \"stddev_ts\": 0.017383,\n    \"samples_ns\": [ 2207433478, 2206702377, 2206113351 ],\n    \"samples_ts\": [ 57.9859, 58.0051, 58.0206 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:01:21Z\",\n    \"avg_ns\": 7955544734,\n    \"stddev_ns\": 1862512,\n    \"avg_ts\": 16.089408,\n    \"stddev_ts\": 0.003758,\n    \"samples_ns\": [ 7957059466, 7953471226, 7956103512 ],\n    \"samples_ts\": [ 16.0863, 16.0936, 16.0883 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:01:12Z",
+          "avg_ns": 2206749735,
+          "stddev_ns": 663002,
+          "avg_ts": 58.003862,
+          "stddev_ts": 0.017383,
+          "samples_ns": [
+            2207433478,
+            2206702377,
+            2206113351
+          ],
+          "samples_ts": [
+            57.9859,
+            58.0051,
+            58.0206
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:01:21Z",
+          "avg_ns": 7955544734,
+          "stddev_ns": 1862512,
+          "avg_ts": 16.089408,
+          "stddev_ts": 0.003758,
+          "samples_ns": [
+            7957059466,
+            7953471226,
+            7956103512
+          ],
+          "samples_ts": [
+            16.0863,
+            16.0936,
+            16.0883
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 824
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:03:30.964879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:01:46Z\",\n    \"avg_ns\": 2199111805,\n    \"stddev_ns\": 6883139,\n    \"avg_ts\": 58.205697,\n    \"stddev_ts\": 0.181977,\n    \"samples_ns\": [ 2193285373, 2197343448, 2206706594 ],\n    \"samples_ts\": [ 58.3599, 58.2522, 58.005 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:01:54Z\",\n    \"avg_ns\": 31983552584,\n    \"stddev_ns\": 19976009,\n    \"avg_ts\": 16.008232,\n    \"stddev_ts\": 0.009994,\n    \"samples_ns\": [ 32006434822, 31969605597, 31974617335 ],\n    \"samples_ts\": [ 15.9968, 16.0152, 16.0127 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:01:46Z",
+          "avg_ns": 2199111805,
+          "stddev_ns": 6883139,
+          "avg_ts": 58.205697,
+          "stddev_ts": 0.181977,
+          "samples_ns": [
+            2193285373,
+            2197343448,
+            2206706594
+          ],
+          "samples_ts": [
+            58.3599,
+            58.2522,
+            58.005
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:01:54Z",
+          "avg_ns": 31983552584,
+          "stddev_ns": 19976009,
+          "avg_ts": 16.008232,
+          "stddev_ts": 0.009994,
+          "samples_ns": [
+            32006434822,
+            31969605597,
+            31974617335
+          ],
+          "samples_ts": [
+            15.9968,
+            16.0152,
+            16.0127
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 825
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:04:32.163321+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:03:32Z\",\n    \"avg_ns\": 9052716977,\n    \"stddev_ns\": 18769676,\n    \"avg_ts\": 56.557768,\n    \"stddev_ts\": 0.117400,\n    \"samples_ns\": [ 9031093032, 9064788472, 9062269429 ],\n    \"samples_ts\": [ 56.693, 56.4823, 56.498 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:04:08Z\",\n    \"avg_ns\": 7940283145,\n    \"stddev_ns\": 6764130,\n    \"avg_ts\": 16.120340,\n    \"stddev_ts\": 0.013729,\n    \"samples_ns\": [ 7947205418, 7933691643, 7939952376 ],\n    \"samples_ts\": [ 16.1063, 16.1337, 16.121 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:03:32Z",
+          "avg_ns": 9052716977,
+          "stddev_ns": 18769676,
+          "avg_ts": 56.557768,
+          "stddev_ts": 0.1174,
+          "samples_ns": [
+            9031093032,
+            9064788472,
+            9062269429
+          ],
+          "samples_ts": [
+            56.693,
+            56.4823,
+            56.498
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:04:08Z",
+          "avg_ns": 7940283145,
+          "stddev_ns": 6764130,
+          "avg_ts": 16.12034,
+          "stddev_ts": 0.013729,
+          "samples_ns": [
+            7947205418,
+            7933691643,
+            7939952376
+          ],
+          "samples_ts": [
+            16.1063,
+            16.1337,
+            16.121
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 826
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:06:45.955742+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:04:33Z\",\n    \"avg_ns\": 9063638831,\n    \"stddev_ns\": 4964418,\n    \"avg_ts\": 56.489464,\n    \"stddev_ts\": 0.030943,\n    \"samples_ns\": [ 9065462267, 9058021511, 9067432716 ],\n    \"samples_ts\": [ 56.4781, 56.5245, 56.4658 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:05:09Z\",\n    \"avg_ns\": 32119277741,\n    \"stddev_ns\": 14906679,\n    \"avg_ts\": 15.940585,\n    \"stddev_ts\": 0.007400,\n    \"samples_ns\": [ 32102160504, 32126268164, 32129404555 ],\n    \"samples_ts\": [ 15.9491, 15.9371, 15.9356 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:04:33Z",
+          "avg_ns": 9063638831,
+          "stddev_ns": 4964418,
+          "avg_ts": 56.489464,
+          "stddev_ts": 0.030943,
+          "samples_ns": [
+            9065462267,
+            9058021511,
+            9067432716
+          ],
+          "samples_ts": [
+            56.4781,
+            56.5245,
+            56.4658
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:05:09Z",
+          "avg_ns": 32119277741,
+          "stddev_ns": 14906679,
+          "avg_ts": 15.940585,
+          "stddev_ts": 0.0074,
+          "samples_ns": [
+            32102160504,
+            32126268164,
+            32129404555
+          ],
+          "samples_ts": [
+            15.9491,
+            15.9371,
+            15.9356
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 827
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:07:19.352720+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:06:46Z\",\n    \"avg_ns\": 1708545437,\n    \"stddev_ns\": 6486439,\n    \"avg_ts\": 74.918249,\n    \"stddev_ts\": 0.285038,\n    \"samples_ns\": [ 1701055914, 1712317598, 1712262801 ],\n    \"samples_ts\": [ 75.2474, 74.7525, 74.7549 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:06:53Z\",\n    \"avg_ns\": 8459935858,\n    \"stddev_ns\": 35477673,\n    \"avg_ts\": 15.130315,\n    \"stddev_ts\": 0.063337,\n    \"samples_ns\": [ 8448711137, 8499667900, 8431428539 ],\n    \"samples_ts\": [ 15.1502, 15.0594, 15.1813 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:06:46Z",
+          "avg_ns": 1708545437,
+          "stddev_ns": 6486439,
+          "avg_ts": 74.918249,
+          "stddev_ts": 0.285038,
+          "samples_ns": [
+            1701055914,
+            1712317598,
+            1712262801
+          ],
+          "samples_ts": [
+            75.2474,
+            74.7525,
+            74.7549
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:06:53Z",
+          "avg_ns": 8459935858,
+          "stddev_ns": 35477673,
+          "avg_ts": 15.130315,
+          "stddev_ts": 0.063337,
+          "samples_ns": [
+            8448711137,
+            8499667900,
+            8431428539
+          ],
+          "samples_ts": [
+            15.1502,
+            15.0594,
+            15.1813
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 828
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:09:11.384357+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:07:20Z\",\n    \"avg_ns\": 1709823490,\n    \"stddev_ns\": 7346461,\n    \"avg_ts\": 74.862450,\n    \"stddev_ts\": 0.321784,\n    \"samples_ns\": [ 1702256510, 1710286680, 1716927281 ],\n    \"samples_ts\": [ 75.1943, 74.8413, 74.5518 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:07:27Z\",\n    \"avg_ns\": 34670139095,\n    \"stddev_ns\": 765846666,\n    \"avg_ts\": 14.772518,\n    \"stddev_ts\": 0.323689,\n    \"samples_ns\": [ 34028298644, 34464227522, 35517891119 ],\n    \"samples_ts\": [ 15.0463, 14.856, 14.4153 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:07:20Z",
+          "avg_ns": 1709823490,
+          "stddev_ns": 7346461,
+          "avg_ts": 74.86245,
+          "stddev_ts": 0.321784,
+          "samples_ns": [
+            1702256510,
+            1710286680,
+            1716927281
+          ],
+          "samples_ts": [
+            75.1943,
+            74.8413,
+            74.5518
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:07:27Z",
+          "avg_ns": 34670139095,
+          "stddev_ns": 765846666,
+          "avg_ts": 14.772518,
+          "stddev_ts": 0.323689,
+          "samples_ns": [
+            34028298644,
+            34464227522,
+            35517891119
+          ],
+          "samples_ts": [
+            15.0463,
+            14.856,
+            14.4153
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 829
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:10:05.915878+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:09:12Z\",\n    \"avg_ns\": 6824029725,\n    \"stddev_ns\": 5163992,\n    \"avg_ts\": 75.029010,\n    \"stddev_ts\": 0.056753,\n    \"samples_ns\": [ 6820819378, 6829986573, 6821283224 ],\n    \"samples_ts\": [ 75.0643, 74.9635, 75.0592 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:09:39Z\",\n    \"avg_ns\": 8674154510,\n    \"stddev_ns\": 93651322,\n    \"avg_ts\": 14.757633,\n    \"stddev_ts\": 0.159999,\n    \"samples_ns\": [ 8570280811, 8700048246, 8752134475 ],\n    \"samples_ts\": [ 14.9353, 14.7126, 14.625 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:09:12Z",
+          "avg_ns": 6824029725,
+          "stddev_ns": 5163992,
+          "avg_ts": 75.02901,
+          "stddev_ts": 0.056753,
+          "samples_ns": [
+            6820819378,
+            6829986573,
+            6821283224
+          ],
+          "samples_ts": [
+            75.0643,
+            74.9635,
+            75.0592
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:09:39Z",
+          "avg_ns": 8674154510,
+          "stddev_ns": 93651322,
+          "avg_ts": 14.757633,
+          "stddev_ts": 0.159999,
+          "samples_ns": [
+            8570280811,
+            8700048246,
+            8752134475
+          ],
+          "samples_ts": [
+            14.9353,
+            14.7126,
+            14.625
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 830
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:12:20.822813+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:10:06Z\",\n    \"avg_ns\": 6819554953,\n    \"stddev_ns\": 3277187,\n    \"avg_ts\": 75.078225,\n    \"stddev_ts\": 0.036087,\n    \"samples_ns\": [ 6822182441, 6815882780, 6820599638 ],\n    \"samples_ts\": [ 75.0493, 75.1187, 75.0667 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:10:34Z\",\n    \"avg_ns\": 35462486002,\n    \"stddev_ns\": 98461181,\n    \"avg_ts\": 14.437866,\n    \"stddev_ts\": 0.040135,\n    \"samples_ns\": [ 35352101127, 35541257852, 35494099027 ],\n    \"samples_ts\": [ 14.4829, 14.4058, 14.4249 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:10:06Z",
+          "avg_ns": 6819554953,
+          "stddev_ns": 3277187,
+          "avg_ts": 75.078225,
+          "stddev_ts": 0.036087,
+          "samples_ns": [
+            6822182441,
+            6815882780,
+            6820599638
+          ],
+          "samples_ts": [
+            75.0493,
+            75.1187,
+            75.0667
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:10:34Z",
+          "avg_ns": 35462486002,
+          "stddev_ns": 98461181,
+          "avg_ts": 14.437866,
+          "stddev_ts": 0.040135,
+          "samples_ns": [
+            35352101127,
+            35541257852,
+            35494099027
+          ],
+          "samples_ts": [
+            14.4829,
+            14.4058,
+            14.4249
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 831
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:12:54.912093+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:12:21Z\",\n    \"avg_ns\": 1707706089,\n    \"stddev_ns\": 2840042,\n    \"avg_ts\": 74.954488,\n    \"stddev_ts\": 0.124540,\n    \"samples_ns\": [ 1705819488, 1710972404, 1706326375 ],\n    \"samples_ts\": [ 75.0372, 74.8113, 75.015 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:12:28Z\",\n    \"avg_ns\": 8680703412,\n    \"stddev_ns\": 17412042,\n    \"avg_ts\": 14.745388,\n    \"stddev_ts\": 0.029607,\n    \"samples_ns\": [ 8660818177, 8688076280, 8693215780 ],\n    \"samples_ts\": [ 14.7792, 14.7328, 14.7241 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:12:21Z",
+          "avg_ns": 1707706089,
+          "stddev_ns": 2840042,
+          "avg_ts": 74.954488,
+          "stddev_ts": 0.12454,
+          "samples_ns": [
+            1705819488,
+            1710972404,
+            1706326375
+          ],
+          "samples_ts": [
+            75.0372,
+            74.8113,
+            75.015
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:12:28Z",
+          "avg_ns": 8680703412,
+          "stddev_ns": 17412042,
+          "avg_ts": 14.745388,
+          "stddev_ts": 0.029607,
+          "samples_ns": [
+            8660818177,
+            8688076280,
+            8693215780
+          ],
+          "samples_ts": [
+            14.7792,
+            14.7328,
+            14.7241
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 832
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:14:49.578137+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:12:55Z\",\n    \"avg_ns\": 1700886500,\n    \"stddev_ns\": 1407728,\n    \"avg_ts\": 75.254909,\n    \"stddev_ts\": 0.062257,\n    \"samples_ns\": [ 1700273336, 1699889348, 1702496816 ],\n    \"samples_ts\": [ 75.282, 75.299, 75.1837 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:13:02Z\",\n    \"avg_ns\": 35553677618,\n    \"stddev_ns\": 32417514,\n    \"avg_ts\": 14.400769,\n    \"stddev_ts\": 0.013125,\n    \"samples_ns\": [ 35527765926, 35590027862, 35543239067 ],\n    \"samples_ts\": [ 14.4113, 14.3861, 14.405 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:12:55Z",
+          "avg_ns": 1700886500,
+          "stddev_ns": 1407728,
+          "avg_ts": 75.254909,
+          "stddev_ts": 0.062257,
+          "samples_ns": [
+            1700273336,
+            1699889348,
+            1702496816
+          ],
+          "samples_ts": [
+            75.282,
+            75.299,
+            75.1837
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:13:02Z",
+          "avg_ns": 35553677618,
+          "stddev_ns": 32417514,
+          "avg_ts": 14.400769,
+          "stddev_ts": 0.013125,
+          "samples_ns": [
+            35527765926,
+            35590027862,
+            35543239067
+          ],
+          "samples_ts": [
+            14.4113,
+            14.3861,
+            14.405
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 833
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:15:44.536319+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:14:50Z\",\n    \"avg_ns\": 6889686643,\n    \"stddev_ns\": 8778598,\n    \"avg_ts\": 74.314055,\n    \"stddev_ts\": 0.094716,\n    \"samples_ns\": [ 6891503592, 6897413795, 6880142544 ],\n    \"samples_ts\": [ 74.2944, 74.2307, 74.4171 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:15:18Z\",\n    \"avg_ns\": 8745137693,\n    \"stddev_ns\": 39373071,\n    \"avg_ts\": 14.636903,\n    \"stddev_ts\": 0.066071,\n    \"samples_ns\": [ 8699692781, 8766716140, 8769004158 ],\n    \"samples_ts\": [ 14.7132, 14.6007, 14.5969 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:14:50Z",
+          "avg_ns": 6889686643,
+          "stddev_ns": 8778598,
+          "avg_ts": 74.314055,
+          "stddev_ts": 0.094716,
+          "samples_ns": [
+            6891503592,
+            6897413795,
+            6880142544
+          ],
+          "samples_ts": [
+            74.2944,
+            74.2307,
+            74.4171
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:15:18Z",
+          "avg_ns": 8745137693,
+          "stddev_ns": 39373071,
+          "avg_ts": 14.636903,
+          "stddev_ts": 0.066071,
+          "samples_ns": [
+            8699692781,
+            8766716140,
+            8769004158
+          ],
+          "samples_ts": [
+            14.7132,
+            14.6007,
+            14.5969
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 834
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:17:59.733027+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:15:45Z\",\n    \"avg_ns\": 6889053862,\n    \"stddev_ns\": 12100108,\n    \"avg_ts\": 74.320954,\n    \"stddev_ts\": 0.130409,\n    \"samples_ns\": [ 6880944630, 6883255256, 6902961701 ],\n    \"samples_ts\": [ 74.4084, 74.3834, 74.1711 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:16:13Z\",\n    \"avg_ns\": 35467942207,\n    \"stddev_ns\": 55625576,\n    \"avg_ts\": 14.435595,\n    \"stddev_ts\": 0.022660,\n    \"samples_ns\": [ 35497030521, 35403804338, 35502991764 ],\n    \"samples_ts\": [ 14.4237, 14.4617, 14.4213 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:15:45Z",
+          "avg_ns": 6889053862,
+          "stddev_ns": 12100108,
+          "avg_ts": 74.320954,
+          "stddev_ts": 0.130409,
+          "samples_ns": [
+            6880944630,
+            6883255256,
+            6902961701
+          ],
+          "samples_ts": [
+            74.4084,
+            74.3834,
+            74.1711
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:16:13Z",
+          "avg_ns": 35467942207,
+          "stddev_ns": 55625576,
+          "avg_ts": 14.435595,
+          "stddev_ts": 0.02266,
+          "samples_ns": [
+            35497030521,
+            35403804338,
+            35502991764
+          ],
+          "samples_ts": [
+            14.4237,
+            14.4617,
+            14.4213
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 835
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:18:34.021672+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:18:00Z\",\n    \"avg_ns\": 1702347713,\n    \"stddev_ns\": 2134636,\n    \"avg_ts\": 75.190358,\n    \"stddev_ts\": 0.094317,\n    \"samples_ns\": [ 1703549060, 1703610060, 1699884021 ],\n    \"samples_ts\": [ 75.1373, 75.1346, 75.2993 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:18:07Z\",\n    \"avg_ns\": 8753398209,\n    \"stddev_ns\": 17578950,\n    \"avg_ts\": 14.622932,\n    \"stddev_ts\": 0.029344,\n    \"samples_ns\": [ 8738509376, 8772790177, 8748895076 ],\n    \"samples_ts\": [ 14.6478, 14.5906, 14.6304 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:18:00Z",
+          "avg_ns": 1702347713,
+          "stddev_ns": 2134636,
+          "avg_ts": 75.190358,
+          "stddev_ts": 0.094317,
+          "samples_ns": [
+            1703549060,
+            1703610060,
+            1699884021
+          ],
+          "samples_ts": [
+            75.1373,
+            75.1346,
+            75.2993
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:18:07Z",
+          "avg_ns": 8753398209,
+          "stddev_ns": 17578950,
+          "avg_ts": 14.622932,
+          "stddev_ts": 0.029344,
+          "samples_ns": [
+            8738509376,
+            8772790177,
+            8748895076
+          ],
+          "samples_ts": [
+            14.6478,
+            14.5906,
+            14.6304
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 836
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:20:28.527717+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:18:35Z\",\n    \"avg_ns\": 1696548383,\n    \"stddev_ns\": 2455689,\n    \"avg_ts\": 75.447408,\n    \"stddev_ts\": 0.109103,\n    \"samples_ns\": [ 1694957976, 1699376238, 1695310936 ],\n    \"samples_ts\": [ 75.5181, 75.3218, 75.5024 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:18:41Z\",\n    \"avg_ns\": 35489390189,\n    \"stddev_ns\": 19287806,\n    \"avg_ts\": 14.426850,\n    \"stddev_ts\": 0.007842,\n    \"samples_ns\": [ 35468055513, 35494528318, 35505586738 ],\n    \"samples_ts\": [ 14.4355, 14.4248, 14.4203 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:18:35Z",
+          "avg_ns": 1696548383,
+          "stddev_ns": 2455689,
+          "avg_ts": 75.447408,
+          "stddev_ts": 0.109103,
+          "samples_ns": [
+            1694957976,
+            1699376238,
+            1695310936
+          ],
+          "samples_ts": [
+            75.5181,
+            75.3218,
+            75.5024
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:18:41Z",
+          "avg_ns": 35489390189,
+          "stddev_ns": 19287806,
+          "avg_ts": 14.42685,
+          "stddev_ts": 0.007842,
+          "samples_ns": [
+            35468055513,
+            35494528318,
+            35505586738
+          ],
+          "samples_ts": [
+            14.4355,
+            14.4248,
+            14.4203
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 837
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:21:24.299040+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:20:29Z\",\n    \"avg_ns\": 7054892944,\n    \"stddev_ns\": 4546066,\n    \"avg_ts\": 72.573765,\n    \"stddev_ts\": 0.046758,\n    \"samples_ns\": [ 7059788416, 7054086088, 7050804328 ],\n    \"samples_ts\": [ 72.5234, 72.582, 72.6158 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:20:57Z\",\n    \"avg_ns\": 8780035296,\n    \"stddev_ns\": 12031731,\n    \"avg_ts\": 14.578547,\n    \"stddev_ts\": 0.019978,\n    \"samples_ns\": [ 8767933879, 8791996109, 8780175900 ],\n    \"samples_ts\": [ 14.5987, 14.5587, 14.5783 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:20:29Z",
+          "avg_ns": 7054892944,
+          "stddev_ns": 4546066,
+          "avg_ts": 72.573765,
+          "stddev_ts": 0.046758,
+          "samples_ns": [
+            7059788416,
+            7054086088,
+            7050804328
+          ],
+          "samples_ts": [
+            72.5234,
+            72.582,
+            72.6158
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:20:57Z",
+          "avg_ns": 8780035296,
+          "stddev_ns": 12031731,
+          "avg_ts": 14.578547,
+          "stddev_ts": 0.019978,
+          "samples_ns": [
+            8767933879,
+            8791996109,
+            8780175900
+          ],
+          "samples_ts": [
+            14.5987,
+            14.5587,
+            14.5783
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 838
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:23:40.430583+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:21:25Z\",\n    \"avg_ns\": 7058185451,\n    \"stddev_ns\": 5499947,\n    \"avg_ts\": 72.539920,\n    \"stddev_ts\": 0.056536,\n    \"samples_ns\": [ 7051869017, 7060784833, 7061902505 ],\n    \"samples_ts\": [ 72.6049, 72.5132, 72.5017 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:21:53Z\",\n    \"avg_ns\": 35569491681,\n    \"stddev_ns\": 66244954,\n    \"avg_ts\": 14.394391,\n    \"stddev_ts\": 0.026807,\n    \"samples_ns\": [ 35568527688, 35503734522, 35636212835 ],\n    \"samples_ts\": [ 14.3947, 14.421, 14.3674 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:21:25Z",
+          "avg_ns": 7058185451,
+          "stddev_ns": 5499947,
+          "avg_ts": 72.53992,
+          "stddev_ts": 0.056536,
+          "samples_ns": [
+            7051869017,
+            7060784833,
+            7061902505
+          ],
+          "samples_ts": [
+            72.6049,
+            72.5132,
+            72.5017
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:21:53Z",
+          "avg_ns": 35569491681,
+          "stddev_ns": 66244954,
+          "avg_ts": 14.394391,
+          "stddev_ts": 0.026807,
+          "samples_ns": [
+            35568527688,
+            35503734522,
+            35636212835
+          ],
+          "samples_ts": [
+            14.3947,
+            14.421,
+            14.3674
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 839
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:24:14.814583+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:23:41Z\",\n    \"avg_ns\": 1702231426,\n    \"stddev_ns\": 1594410,\n    \"avg_ts\": 75.195460,\n    \"stddev_ts\": 0.070395,\n    \"samples_ns\": [ 1701245089, 1701378303, 1704070886 ],\n    \"samples_ts\": [ 75.239, 75.2331, 75.1142 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:23:48Z\",\n    \"avg_ns\": 8793506498,\n    \"stddev_ns\": 66579516,\n    \"avg_ts\": 14.556754,\n    \"stddev_ts\": 0.110629,\n    \"samples_ns\": [ 8717915660, 8843440375, 8819163460 ],\n    \"samples_ts\": [ 14.6824, 14.474, 14.5138 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:23:41Z",
+          "avg_ns": 1702231426,
+          "stddev_ns": 1594410,
+          "avg_ts": 75.19546,
+          "stddev_ts": 0.070395,
+          "samples_ns": [
+            1701245089,
+            1701378303,
+            1704070886
+          ],
+          "samples_ts": [
+            75.239,
+            75.2331,
+            75.1142
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:23:48Z",
+          "avg_ns": 8793506498,
+          "stddev_ns": 66579516,
+          "avg_ts": 14.556754,
+          "stddev_ts": 0.110629,
+          "samples_ns": [
+            8717915660,
+            8843440375,
+            8819163460
+          ],
+          "samples_ts": [
+            14.6824,
+            14.474,
+            14.5138
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 840
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:26:09.173539+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:24:15Z\",\n    \"avg_ns\": 1701070518,\n    \"stddev_ns\": 4956914,\n    \"avg_ts\": 75.247160,\n    \"stddev_ts\": 0.219360,\n    \"samples_ns\": [ 1701536827, 1695896926, 1705777801 ],\n    \"samples_ts\": [ 75.2261, 75.4763, 75.0391 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:24:22Z\",\n    \"avg_ns\": 35452441137,\n    \"stddev_ns\": 42535663,\n    \"avg_ts\": 14.441897,\n    \"stddev_ts\": 0.017328,\n    \"samples_ns\": [ 35452722467, 35494835438, 35409765506 ],\n    \"samples_ts\": [ 14.4418, 14.4246, 14.4593 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:24:15Z",
+          "avg_ns": 1701070518,
+          "stddev_ns": 4956914,
+          "avg_ts": 75.24716,
+          "stddev_ts": 0.21936,
+          "samples_ns": [
+            1701536827,
+            1695896926,
+            1705777801
+          ],
+          "samples_ts": [
+            75.2261,
+            75.4763,
+            75.0391
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:24:22Z",
+          "avg_ns": 35452441137,
+          "stddev_ns": 42535663,
+          "avg_ts": 14.441897,
+          "stddev_ts": 0.017328,
+          "samples_ns": [
+            35452722467,
+            35494835438,
+            35409765506
+          ],
+          "samples_ts": [
+            14.4418,
+            14.4246,
+            14.4593
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 841
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:27:03.843710+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:26:10Z\",\n    \"avg_ns\": 6824240094,\n    \"stddev_ns\": 7715357,\n    \"avg_ts\": 75.026733,\n    \"stddev_ts\": 0.084840,\n    \"samples_ns\": [ 6816112870, 6825143341, 6831464071 ],\n    \"samples_ts\": [ 75.1161, 75.0167, 74.9473 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:26:37Z\",\n    \"avg_ns\": 8731180353,\n    \"stddev_ns\": 40507912,\n    \"avg_ts\": 14.660314,\n    \"stddev_ts\": 0.068180,\n    \"samples_ns\": [ 8684935677, 8760382448, 8748222934 ],\n    \"samples_ts\": [ 14.7382, 14.6112, 14.6315 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:26:10Z",
+          "avg_ns": 6824240094,
+          "stddev_ns": 7715357,
+          "avg_ts": 75.026733,
+          "stddev_ts": 0.08484,
+          "samples_ns": [
+            6816112870,
+            6825143341,
+            6831464071
+          ],
+          "samples_ts": [
+            75.1161,
+            75.0167,
+            74.9473
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:26:37Z",
+          "avg_ns": 8731180353,
+          "stddev_ns": 40507912,
+          "avg_ts": 14.660314,
+          "stddev_ts": 0.06818,
+          "samples_ns": [
+            8684935677,
+            8760382448,
+            8748222934
+          ],
+          "samples_ts": [
+            14.7382,
+            14.6112,
+            14.6315
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 842
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:29:18.525804+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:27:04Z\",\n    \"avg_ns\": 6820518729,\n    \"stddev_ns\": 1538241,\n    \"avg_ts\": 75.067607,\n    \"stddev_ts\": 0.016930,\n    \"samples_ns\": [ 6820354847, 6819068990, 6822132350 ],\n    \"samples_ts\": [ 75.0694, 75.0836, 75.0498 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:27:32Z\",\n    \"avg_ns\": 35378485601,\n    \"stddev_ns\": 175115644,\n    \"avg_ts\": 14.472309,\n    \"stddev_ts\": 0.071824,\n    \"samples_ns\": [ 35455244479, 35502113908, 35178098417 ],\n    \"samples_ts\": [ 14.4407, 14.4217, 14.5545 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:27:04Z",
+          "avg_ns": 6820518729,
+          "stddev_ns": 1538241,
+          "avg_ts": 75.067607,
+          "stddev_ts": 0.01693,
+          "samples_ns": [
+            6820354847,
+            6819068990,
+            6822132350
+          ],
+          "samples_ts": [
+            75.0694,
+            75.0836,
+            75.0498
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:27:32Z",
+          "avg_ns": 35378485601,
+          "stddev_ns": 175115644,
+          "avg_ts": 14.472309,
+          "stddev_ts": 0.071824,
+          "samples_ns": [
+            35455244479,
+            35502113908,
+            35178098417
+          ],
+          "samples_ts": [
+            14.4407,
+            14.4217,
+            14.5545
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 843
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:29:52.956472+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:29:19Z\",\n    \"avg_ns\": 1705169464,\n    \"stddev_ns\": 2978872,\n    \"avg_ts\": 75.066005,\n    \"stddev_ts\": 0.131012,\n    \"samples_ns\": [ 1703155394, 1708591312, 1703761686 ],\n    \"samples_ts\": [ 75.1546, 74.9155, 75.1279 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:29:26Z\",\n    \"avg_ns\": 8806006997,\n    \"stddev_ns\": 40744573,\n    \"avg_ts\": 14.535740,\n    \"stddev_ts\": 0.067435,\n    \"samples_ns\": [ 8758971130, 8830432127, 8828617736 ],\n    \"samples_ts\": [ 14.6136, 14.4953, 14.4983 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:29:19Z",
+          "avg_ns": 1705169464,
+          "stddev_ns": 2978872,
+          "avg_ts": 75.066005,
+          "stddev_ts": 0.131012,
+          "samples_ns": [
+            1703155394,
+            1708591312,
+            1703761686
+          ],
+          "samples_ts": [
+            75.1546,
+            74.9155,
+            75.1279
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:29:26Z",
+          "avg_ns": 8806006997,
+          "stddev_ns": 40744573,
+          "avg_ts": 14.53574,
+          "stddev_ts": 0.067435,
+          "samples_ns": [
+            8758971130,
+            8830432127,
+            8828617736
+          ],
+          "samples_ts": [
+            14.6136,
+            14.4953,
+            14.4983
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 844
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:31:47.725385+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:29:54Z\",\n    \"avg_ns\": 1702659988,\n    \"stddev_ns\": 1046883,\n    \"avg_ts\": 75.176508,\n    \"stddev_ts\": 0.046171,\n    \"samples_ns\": [ 1702189388, 1703858701, 1701931876 ],\n    \"samples_ts\": [ 75.1973, 75.1236, 75.2087 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:30:00Z\",\n    \"avg_ns\": 35572279727,\n    \"stddev_ns\": 88132724,\n    \"avg_ts\": 14.393289,\n    \"stddev_ts\": 0.035705,\n    \"samples_ns\": [ 35636716506, 35471846474, 35608276202 ],\n    \"samples_ts\": [ 14.3672, 14.434, 14.3787 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:29:54Z",
+          "avg_ns": 1702659988,
+          "stddev_ns": 1046883,
+          "avg_ts": 75.176508,
+          "stddev_ts": 0.046171,
+          "samples_ns": [
+            1702189388,
+            1703858701,
+            1701931876
+          ],
+          "samples_ts": [
+            75.1973,
+            75.1236,
+            75.2087
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:30:00Z",
+          "avg_ns": 35572279727,
+          "stddev_ns": 88132724,
+          "avg_ts": 14.393289,
+          "stddev_ts": 0.035705,
+          "samples_ns": [
+            35636716506,
+            35471846474,
+            35608276202
+          ],
+          "samples_ts": [
+            14.3672,
+            14.434,
+            14.3787
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 845
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:32:42.811516+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:31:48Z\",\n    \"avg_ns\": 6877801148,\n    \"stddev_ns\": 11497921,\n    \"avg_ts\": 74.442535,\n    \"stddev_ts\": 0.124412,\n    \"samples_ns\": [ 6889830713, 6876650072, 6866922661 ],\n    \"samples_ts\": [ 74.3124, 74.4549, 74.5603 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:32:16Z\",\n    \"avg_ns\": 8792733280,\n    \"stddev_ns\": 36430622,\n    \"avg_ts\": 14.557643,\n    \"stddev_ts\": 0.060457,\n    \"samples_ns\": [ 8750800547, 8810798635, 8816600659 ],\n    \"samples_ts\": [ 14.6272, 14.5276, 14.5181 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:31:48Z",
+          "avg_ns": 6877801148,
+          "stddev_ns": 11497921,
+          "avg_ts": 74.442535,
+          "stddev_ts": 0.124412,
+          "samples_ns": [
+            6889830713,
+            6876650072,
+            6866922661
+          ],
+          "samples_ts": [
+            74.3124,
+            74.4549,
+            74.5603
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:32:16Z",
+          "avg_ns": 8792733280,
+          "stddev_ns": 36430622,
+          "avg_ts": 14.557643,
+          "stddev_ts": 0.060457,
+          "samples_ns": [
+            8750800547,
+            8810798635,
+            8816600659
+          ],
+          "samples_ts": [
+            14.6272,
+            14.5276,
+            14.5181
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 846
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:34:57.675082+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:32:43Z\",\n    \"avg_ns\": 6882091094,\n    \"stddev_ns\": 5476528,\n    \"avg_ts\": 74.396024,\n    \"stddev_ts\": 0.059193,\n    \"samples_ns\": [ 6887370500, 6876439000, 6882463784 ],\n    \"samples_ts\": [ 74.339, 74.4571, 74.392 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:33:11Z\",\n    \"avg_ns\": 35381200974,\n    \"stddev_ns\": 22885450,\n    \"avg_ts\": 14.470966,\n    \"stddev_ts\": 0.009362,\n    \"samples_ns\": [ 35355957645, 35400586795, 35387058484 ],\n    \"samples_ts\": [ 14.4813, 14.463, 14.4686 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:32:43Z",
+          "avg_ns": 6882091094,
+          "stddev_ns": 5476528,
+          "avg_ts": 74.396024,
+          "stddev_ts": 0.059193,
+          "samples_ns": [
+            6887370500,
+            6876439000,
+            6882463784
+          ],
+          "samples_ts": [
+            74.339,
+            74.4571,
+            74.392
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:33:11Z",
+          "avg_ns": 35381200974,
+          "stddev_ns": 22885450,
+          "avg_ts": 14.470966,
+          "stddev_ts": 0.009362,
+          "samples_ns": [
+            35355957645,
+            35400586795,
+            35387058484
+          ],
+          "samples_ts": [
+            14.4813,
+            14.463,
+            14.4686
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 847
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:35:32.048274+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:34:58Z\",\n    \"avg_ns\": 1707389918,\n    \"stddev_ns\": 3760017,\n    \"avg_ts\": 74.968472,\n    \"stddev_ts\": 0.164922,\n    \"samples_ns\": [ 1711647512, 1705997598, 1704524644 ],\n    \"samples_ts\": [ 74.7818, 75.0294, 75.0943 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:35:05Z\",\n    \"avg_ns\": 8768839936,\n    \"stddev_ns\": 55789777,\n    \"avg_ts\": 14.597537,\n    \"stddev_ts\": 0.093183,\n    \"samples_ns\": [ 8705111993, 8792545939, 8808861877 ],\n    \"samples_ts\": [ 14.704, 14.5578, 14.5308 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:34:58Z",
+          "avg_ns": 1707389918,
+          "stddev_ns": 3760017,
+          "avg_ts": 74.968472,
+          "stddev_ts": 0.164922,
+          "samples_ns": [
+            1711647512,
+            1705997598,
+            1704524644
+          ],
+          "samples_ts": [
+            74.7818,
+            75.0294,
+            75.0943
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:35:05Z",
+          "avg_ns": 8768839936,
+          "stddev_ns": 55789777,
+          "avg_ts": 14.597537,
+          "stddev_ts": 0.093183,
+          "samples_ns": [
+            8705111993,
+            8792545939,
+            8808861877
+          ],
+          "samples_ts": [
+            14.704,
+            14.5578,
+            14.5308
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 848
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:37:26.564418+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:35:33Z\",\n    \"avg_ns\": 1698987138,\n    \"stddev_ns\": 1610716,\n    \"avg_ts\": 75.339050,\n    \"stddev_ts\": 0.071433,\n    \"samples_ns\": [ 1700232430, 1697168815, 1699560170 ],\n    \"samples_ts\": [ 75.2838, 75.4197, 75.3136 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:35:39Z\",\n    \"avg_ns\": 35505628823,\n    \"stddev_ns\": 149301842,\n    \"avg_ts\": 14.420419,\n    \"stddev_ts\": 0.060785,\n    \"samples_ns\": [ 35333368715, 35597743057, 35585774699 ],\n    \"samples_ts\": [ 14.4906, 14.3829, 14.3878 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:35:33Z",
+          "avg_ns": 1698987138,
+          "stddev_ns": 1610716,
+          "avg_ts": 75.33905,
+          "stddev_ts": 0.071433,
+          "samples_ns": [
+            1700232430,
+            1697168815,
+            1699560170
+          ],
+          "samples_ts": [
+            75.2838,
+            75.4197,
+            75.3136
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:35:39Z",
+          "avg_ns": 35505628823,
+          "stddev_ns": 149301842,
+          "avg_ts": 14.420419,
+          "stddev_ts": 0.060785,
+          "samples_ns": [
+            35333368715,
+            35597743057,
+            35585774699
+          ],
+          "samples_ts": [
+            14.4906,
+            14.3829,
+            14.3878
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 849
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:38:22.296209+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:37:27Z\",\n    \"avg_ns\": 7047918344,\n    \"stddev_ns\": 8541211,\n    \"avg_ts\": 72.645635,\n    \"stddev_ts\": 0.087994,\n    \"samples_ns\": [ 7040719964, 7057355577, 7045679492 ],\n    \"samples_ts\": [ 72.7198, 72.5484, 72.6686 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:37:55Z\",\n    \"avg_ns\": 8775972860,\n    \"stddev_ns\": 34187715,\n    \"avg_ts\": 14.585425,\n    \"stddev_ts\": 0.056893,\n    \"samples_ns\": [ 8738465843, 8784062934, 8805389804 ],\n    \"samples_ts\": [ 14.6479, 14.5718, 14.5366 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:37:27Z",
+          "avg_ns": 7047918344,
+          "stddev_ns": 8541211,
+          "avg_ts": 72.645635,
+          "stddev_ts": 0.087994,
+          "samples_ns": [
+            7040719964,
+            7057355577,
+            7045679492
+          ],
+          "samples_ts": [
+            72.7198,
+            72.5484,
+            72.6686
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:37:55Z",
+          "avg_ns": 8775972860,
+          "stddev_ns": 34187715,
+          "avg_ts": 14.585425,
+          "stddev_ts": 0.056893,
+          "samples_ns": [
+            8738465843,
+            8784062934,
+            8805389804
+          ],
+          "samples_ts": [
+            14.6479,
+            14.5718,
+            14.5366
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 850
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:40:38.845529+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:38:23Z\",\n    \"avg_ns\": 7058041566,\n    \"stddev_ns\": 7430388,\n    \"avg_ts\": 72.541423,\n    \"stddev_ts\": 0.076396,\n    \"samples_ns\": [ 7064116669, 7060250295, 7049757735 ],\n    \"samples_ts\": [ 72.479, 72.5187, 72.6266 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:38:51Z\",\n    \"avg_ns\": 35704006022,\n    \"stddev_ns\": 66587611,\n    \"avg_ts\": 14.340161,\n    \"stddev_ts\": 0.026724,\n    \"samples_ns\": [ 35649434492, 35778199139, 35684384437 ],\n    \"samples_ts\": [ 14.3621, 14.3104, 14.348 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:38:23Z",
+          "avg_ns": 7058041566,
+          "stddev_ns": 7430388,
+          "avg_ts": 72.541423,
+          "stddev_ts": 0.076396,
+          "samples_ns": [
+            7064116669,
+            7060250295,
+            7049757735
+          ],
+          "samples_ts": [
+            72.479,
+            72.5187,
+            72.6266
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:38:51Z",
+          "avg_ns": 35704006022,
+          "stddev_ns": 66587611,
+          "avg_ts": 14.340161,
+          "stddev_ts": 0.026724,
+          "samples_ns": [
+            35649434492,
+            35778199139,
+            35684384437
+          ],
+          "samples_ts": [
+            14.3621,
+            14.3104,
+            14.348
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 851
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:41:13.092977+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:40:39Z\",\n    \"avg_ns\": 1702872913,\n    \"stddev_ns\": 4874021,\n    \"avg_ts\": 75.167499,\n    \"stddev_ts\": 0.214786,\n    \"samples_ns\": [ 1700201834, 1699918537, 1708498369 ],\n    \"samples_ts\": [ 75.2852, 75.2977, 74.9196 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:40:46Z\",\n    \"avg_ns\": 8740785847,\n    \"stddev_ns\": 64726893,\n    \"avg_ts\": 14.644529,\n    \"stddev_ts\": 0.108775,\n    \"samples_ns\": [ 8668598388, 8760107874, 8793651280 ],\n    \"samples_ts\": [ 14.7659, 14.6117, 14.556 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:40:39Z",
+          "avg_ns": 1702872913,
+          "stddev_ns": 4874021,
+          "avg_ts": 75.167499,
+          "stddev_ts": 0.214786,
+          "samples_ns": [
+            1700201834,
+            1699918537,
+            1708498369
+          ],
+          "samples_ts": [
+            75.2852,
+            75.2977,
+            74.9196
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:40:46Z",
+          "avg_ns": 8740785847,
+          "stddev_ns": 64726893,
+          "avg_ts": 14.644529,
+          "stddev_ts": 0.108775,
+          "samples_ns": [
+            8668598388,
+            8760107874,
+            8793651280
+          ],
+          "samples_ts": [
+            14.7659,
+            14.6117,
+            14.556
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 852
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:43:07.427211+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:41:14Z\",\n    \"avg_ns\": 1709044594,\n    \"stddev_ns\": 7540822,\n    \"avg_ts\": 74.896617,\n    \"stddev_ts\": 0.329789,\n    \"samples_ns\": [ 1706413096, 1717548320, 1703172368 ],\n    \"samples_ts\": [ 75.0111, 74.5248, 75.1539 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:41:20Z\",\n    \"avg_ns\": 35434463861,\n    \"stddev_ns\": 87299150,\n    \"avg_ts\": 14.449268,\n    \"stddev_ts\": 0.035553,\n    \"samples_ns\": [ 35371256218, 35398062786, 35534072581 ],\n    \"samples_ts\": [ 14.475, 14.4641, 14.4087 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:41:14Z",
+          "avg_ns": 1709044594,
+          "stddev_ns": 7540822,
+          "avg_ts": 74.896617,
+          "stddev_ts": 0.329789,
+          "samples_ns": [
+            1706413096,
+            1717548320,
+            1703172368
+          ],
+          "samples_ts": [
+            75.0111,
+            74.5248,
+            75.1539
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:41:20Z",
+          "avg_ns": 35434463861,
+          "stddev_ns": 87299150,
+          "avg_ts": 14.449268,
+          "stddev_ts": 0.035553,
+          "samples_ns": [
+            35371256218,
+            35398062786,
+            35534072581
+          ],
+          "samples_ts": [
+            14.475,
+            14.4641,
+            14.4087
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 853
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:44:02.169018+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:43:08Z\",\n    \"avg_ns\": 6835114036,\n    \"stddev_ns\": 8360669,\n    \"avg_ts\": 74.907384,\n    \"stddev_ts\": 0.091583,\n    \"samples_ns\": [ 6844399791, 6832758485, 6828183832 ],\n    \"samples_ts\": [ 74.8057, 74.9331, 74.9833 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:43:35Z\",\n    \"avg_ns\": 8732147935,\n    \"stddev_ns\": 74343543,\n    \"avg_ts\": 14.659188,\n    \"stddev_ts\": 0.125056,\n    \"samples_ns\": [ 8652607604, 8799881724, 8743954477 ],\n    \"samples_ts\": [ 14.7932, 14.5457, 14.6387 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:43:08Z",
+          "avg_ns": 6835114036,
+          "stddev_ns": 8360669,
+          "avg_ts": 74.907384,
+          "stddev_ts": 0.091583,
+          "samples_ns": [
+            6844399791,
+            6832758485,
+            6828183832
+          ],
+          "samples_ts": [
+            74.8057,
+            74.9331,
+            74.9833
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:43:35Z",
+          "avg_ns": 8732147935,
+          "stddev_ns": 74343543,
+          "avg_ts": 14.659188,
+          "stddev_ts": 0.125056,
+          "samples_ns": [
+            8652607604,
+            8799881724,
+            8743954477
+          ],
+          "samples_ts": [
+            14.7932,
+            14.5457,
+            14.6387
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 854
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:46:17.045126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:44:03Z\",\n    \"avg_ns\": 6825027489,\n    \"stddev_ns\": 15118639,\n    \"avg_ts\": 75.018258,\n    \"stddev_ts\": 0.166047,\n    \"samples_ns\": [ 6841680845, 6821235760, 6812165863 ],\n    \"samples_ts\": [ 74.8354, 75.0597, 75.1596 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:44:30Z\",\n    \"avg_ns\": 35457235990,\n    \"stddev_ns\": 100981322,\n    \"avg_ts\": 14.440008,\n    \"stddev_ts\": 0.041151,\n    \"samples_ns\": [ 35549749571, 35349512498, 35472445901 ],\n    \"samples_ts\": [ 14.4024, 14.4839, 14.4337 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:44:03Z",
+          "avg_ns": 6825027489,
+          "stddev_ns": 15118639,
+          "avg_ts": 75.018258,
+          "stddev_ts": 0.166047,
+          "samples_ns": [
+            6841680845,
+            6821235760,
+            6812165863
+          ],
+          "samples_ts": [
+            74.8354,
+            75.0597,
+            75.1596
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:44:30Z",
+          "avg_ns": 35457235990,
+          "stddev_ns": 100981322,
+          "avg_ts": 14.440008,
+          "stddev_ts": 0.041151,
+          "samples_ns": [
+            35549749571,
+            35349512498,
+            35472445901
+          ],
+          "samples_ts": [
+            14.4024,
+            14.4839,
+            14.4337
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 855
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:46:51.400101+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:46:18Z\",\n    \"avg_ns\": 1704672303,\n    \"stddev_ns\": 696740,\n    \"avg_ts\": 75.087754,\n    \"stddev_ts\": 0.030575,\n    \"samples_ns\": [ 1705471471, 1704217608, 1704327832 ],\n    \"samples_ts\": [ 75.0526, 75.1078, 75.1029 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:46:24Z\",\n    \"avg_ns\": 8771146678,\n    \"stddev_ns\": 83209663,\n    \"avg_ts\": 14.594183,\n    \"stddev_ts\": 0.139185,\n    \"samples_ns\": [ 8675470201, 8826622928, 8811346907 ],\n    \"samples_ts\": [ 14.7542, 14.5016, 14.5267 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:46:18Z",
+          "avg_ns": 1704672303,
+          "stddev_ns": 696740,
+          "avg_ts": 75.087754,
+          "stddev_ts": 0.030575,
+          "samples_ns": [
+            1705471471,
+            1704217608,
+            1704327832
+          ],
+          "samples_ts": [
+            75.0526,
+            75.1078,
+            75.1029
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:46:24Z",
+          "avg_ns": 8771146678,
+          "stddev_ns": 83209663,
+          "avg_ts": 14.594183,
+          "stddev_ts": 0.139185,
+          "samples_ns": [
+            8675470201,
+            8826622928,
+            8811346907
+          ],
+          "samples_ts": [
+            14.7542,
+            14.5016,
+            14.5267
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 856
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:48:45.923320+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:46:52Z\",\n    \"avg_ns\": 1705163017,\n    \"stddev_ns\": 3627736,\n    \"avg_ts\": 75.066363,\n    \"stddev_ts\": 0.159538,\n    \"samples_ns\": [ 1704016507, 1709224997, 1702247549 ],\n    \"samples_ts\": [ 75.1166, 74.8877, 75.1947 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:46:59Z\",\n    \"avg_ns\": 35501898062,\n    \"stddev_ns\": 21399270,\n    \"avg_ts\": 14.421768,\n    \"stddev_ts\": 0.008696,\n    \"samples_ns\": [ 35511740930, 35477348402, 35516604854 ],\n    \"samples_ts\": [ 14.4178, 14.4317, 14.4158 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:46:52Z",
+          "avg_ns": 1705163017,
+          "stddev_ns": 3627736,
+          "avg_ts": 75.066363,
+          "stddev_ts": 0.159538,
+          "samples_ns": [
+            1704016507,
+            1709224997,
+            1702247549
+          ],
+          "samples_ts": [
+            75.1166,
+            74.8877,
+            75.1947
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:46:59Z",
+          "avg_ns": 35501898062,
+          "stddev_ns": 21399270,
+          "avg_ts": 14.421768,
+          "stddev_ts": 0.008696,
+          "samples_ns": [
+            35511740930,
+            35477348402,
+            35516604854
+          ],
+          "samples_ts": [
+            14.4178,
+            14.4317,
+            14.4158
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 857
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:49:40.935288+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:48:47Z\",\n    \"avg_ns\": 6873250743,\n    \"stddev_ns\": 1390220,\n    \"avg_ts\": 74.491683,\n    \"stddev_ts\": 0.015012,\n    \"samples_ns\": [ 6874842799, 6872320600, 6872588832 ],\n    \"samples_ts\": [ 74.4744, 74.5018, 74.4989 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:49:14Z\",\n    \"avg_ns\": 8762076401,\n    \"stddev_ns\": 45032263,\n    \"avg_ts\": 14.608666,\n    \"stddev_ts\": 0.074867,\n    \"samples_ns\": [ 8732043553, 8740331321, 8813854331 ],\n    \"samples_ts\": [ 14.6587, 14.6448, 14.5226 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:48:47Z",
+          "avg_ns": 6873250743,
+          "stddev_ns": 1390220,
+          "avg_ts": 74.491683,
+          "stddev_ts": 0.015012,
+          "samples_ns": [
+            6874842799,
+            6872320600,
+            6872588832
+          ],
+          "samples_ts": [
+            74.4744,
+            74.5018,
+            74.4989
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:49:14Z",
+          "avg_ns": 8762076401,
+          "stddev_ns": 45032263,
+          "avg_ts": 14.608666,
+          "stddev_ts": 0.074867,
+          "samples_ns": [
+            8732043553,
+            8740331321,
+            8813854331
+          ],
+          "samples_ts": [
+            14.6587,
+            14.6448,
+            14.5226
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 858
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:51:56.233799+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:49:41Z\",\n    \"avg_ns\": 6871918340,\n    \"stddev_ns\": 9940472,\n    \"avg_ts\": 74.506228,\n    \"stddev_ts\": 0.107788,\n    \"samples_ns\": [ 6861588690, 6872750385, 6881415947 ],\n    \"samples_ts\": [ 74.6183, 74.4971, 74.4033 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:50:09Z\",\n    \"avg_ns\": 35535585451,\n    \"stddev_ns\": 65797107,\n    \"avg_ts\": 14.408125,\n    \"stddev_ts\": 0.026685,\n    \"samples_ns\": [ 35541998347, 35466817244, 35597940764 ],\n    \"samples_ts\": [ 14.4055, 14.436, 14.3829 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:49:41Z",
+          "avg_ns": 6871918340,
+          "stddev_ns": 9940472,
+          "avg_ts": 74.506228,
+          "stddev_ts": 0.107788,
+          "samples_ns": [
+            6861588690,
+            6872750385,
+            6881415947
+          ],
+          "samples_ts": [
+            74.6183,
+            74.4971,
+            74.4033
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:50:09Z",
+          "avg_ns": 35535585451,
+          "stddev_ns": 65797107,
+          "avg_ts": 14.408125,
+          "stddev_ts": 0.026685,
+          "samples_ns": [
+            35541998347,
+            35466817244,
+            35597940764
+          ],
+          "samples_ts": [
+            14.4055,
+            14.436,
+            14.3829
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 859
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:52:30.619021+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:51:57Z\",\n    \"avg_ns\": 1706125991,\n    \"stddev_ns\": 1307010,\n    \"avg_ts\": 75.023797,\n    \"stddev_ts\": 0.057499,\n    \"samples_ns\": [ 1706832652, 1704617780, 1706927541 ],\n    \"samples_ts\": [ 74.9927, 75.0901, 74.9885 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:52:04Z\",\n    \"avg_ns\": 8789410808,\n    \"stddev_ns\": 57653092,\n    \"avg_ts\": 14.563396,\n    \"stddev_ts\": 0.095600,\n    \"samples_ns\": [ 8729686196, 8844740708, 8793805521 ],\n    \"samples_ts\": [ 14.6626, 14.4719, 14.5557 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:51:57Z",
+          "avg_ns": 1706125991,
+          "stddev_ns": 1307010,
+          "avg_ts": 75.023797,
+          "stddev_ts": 0.057499,
+          "samples_ns": [
+            1706832652,
+            1704617780,
+            1706927541
+          ],
+          "samples_ts": [
+            74.9927,
+            75.0901,
+            74.9885
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:52:04Z",
+          "avg_ns": 8789410808,
+          "stddev_ns": 57653092,
+          "avg_ts": 14.563396,
+          "stddev_ts": 0.0956,
+          "samples_ns": [
+            8729686196,
+            8844740708,
+            8793805521
+          ],
+          "samples_ts": [
+            14.6626,
+            14.4719,
+            14.5557
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 860
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:54:25.279564+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:52:31Z\",\n    \"avg_ns\": 1701185554,\n    \"stddev_ns\": 1662506,\n    \"avg_ts\": 75.241693,\n    \"stddev_ts\": 0.073444,\n    \"samples_ns\": [ 1703103923, 1700205842, 1700246899 ],\n    \"samples_ts\": [ 75.1569, 75.285, 75.2832 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:52:38Z\",\n    \"avg_ns\": 35542196865,\n    \"stddev_ns\": 160270241,\n    \"avg_ts\": 14.405608,\n    \"stddev_ts\": 0.065039,\n    \"samples_ns\": [ 35368996635, 35572336280, 35685257680 ],\n    \"samples_ts\": [ 14.476, 14.3932, 14.3477 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:52:31Z",
+          "avg_ns": 1701185554,
+          "stddev_ns": 1662506,
+          "avg_ts": 75.241693,
+          "stddev_ts": 0.073444,
+          "samples_ns": [
+            1703103923,
+            1700205842,
+            1700246899
+          ],
+          "samples_ts": [
+            75.1569,
+            75.285,
+            75.2832
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:52:38Z",
+          "avg_ns": 35542196865,
+          "stddev_ns": 160270241,
+          "avg_ts": 14.405608,
+          "stddev_ts": 0.065039,
+          "samples_ns": [
+            35368996635,
+            35572336280,
+            35685257680
+          ],
+          "samples_ts": [
+            14.476,
+            14.3932,
+            14.3477
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 861
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:55:21.064718+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:54:26Z\",\n    \"avg_ns\": 7047304187,\n    \"stddev_ns\": 13987289,\n    \"avg_ts\": 72.652085,\n    \"stddev_ts\": 0.144063,\n    \"samples_ns\": [ 7063084306, 7042393744, 7036434512 ],\n    \"samples_ts\": [ 72.4896, 72.7026, 72.7641 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:54:54Z\",\n    \"avg_ns\": 8800359477,\n    \"stddev_ns\": 11526370,\n    \"avg_ts\": 14.544877,\n    \"stddev_ts\": 0.019038,\n    \"samples_ns\": [ 8796279578, 8813370490, 8791428364 ],\n    \"samples_ts\": [ 14.5516, 14.5234, 14.5596 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:54:26Z",
+          "avg_ns": 7047304187,
+          "stddev_ns": 13987289,
+          "avg_ts": 72.652085,
+          "stddev_ts": 0.144063,
+          "samples_ns": [
+            7063084306,
+            7042393744,
+            7036434512
+          ],
+          "samples_ts": [
+            72.4896,
+            72.7026,
+            72.7641
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:54:54Z",
+          "avg_ns": 8800359477,
+          "stddev_ns": 11526370,
+          "avg_ts": 14.544877,
+          "stddev_ts": 0.019038,
+          "samples_ns": [
+            8796279578,
+            8813370490,
+            8791428364
+          ],
+          "samples_ts": [
+            14.5516,
+            14.5234,
+            14.5596
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 862
+    },
+    {
+      "timestamp_utc": "2025-12-09T15:57:36.934522+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:55:22Z\",\n    \"avg_ns\": 7045823132,\n    \"stddev_ns\": 9642604,\n    \"avg_ts\": 72.667257,\n    \"stddev_ts\": 0.099385,\n    \"samples_ns\": [ 7056725080, 7038412445, 7042331871 ],\n    \"samples_ts\": [ 72.5549, 72.7437, 72.7032 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 1B Q2_K - Medium\",\n    \"model_size\": 683281408,\n    \"model_n_params\": 999885952,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:55:50Z\",\n    \"avg_ns\": 35478633446,\n    \"stddev_ns\": 116109447,\n    \"avg_ts\": 14.431324,\n    \"stddev_ts\": 0.047152,\n    \"samples_ns\": [ 35392342868, 35610642791, 35432914680 ],\n    \"samples_ts\": [ 14.4664, 14.3777, 14.4498 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:55:22Z",
+          "avg_ns": 7045823132,
+          "stddev_ns": 9642604,
+          "avg_ts": 72.667257,
+          "stddev_ts": 0.099385,
+          "samples_ns": [
+            7056725080,
+            7038412445,
+            7042331871
+          ],
+          "samples_ts": [
+            72.5549,
+            72.7437,
+            72.7032
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 1B Q2_K - Medium",
+          "model_size": 683281408,
+          "model_n_params": 999885952,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:55:50Z",
+          "avg_ns": 35478633446,
+          "stddev_ns": 116109447,
+          "avg_ts": 14.431324,
+          "stddev_ts": 0.047152,
+          "samples_ns": [
+            35392342868,
+            35610642791,
+            35432914680
+          ],
+          "samples_ts": [
+            14.4664,
+            14.3777,
+            14.4498
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-1B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 863
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:02:09.439255+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:58:18Z\",\n    \"avg_ns\": 23926395102,\n    \"stddev_ns\": 18874196,\n    \"avg_ts\": 5.349743,\n    \"stddev_ts\": 0.004218,\n    \"samples_ns\": [ 23948114526, 23917095917, 23913974863 ],\n    \"samples_ts\": [ 5.34489, 5.35182, 5.35252 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T15:59:55Z\",\n    \"avg_ns\": 44276647991,\n    \"stddev_ns\": 64056405,\n    \"avg_ts\": 2.890918,\n    \"stddev_ts\": 0.004179,\n    \"samples_ns\": [ 44350594250, 44238196274, 44241153449 ],\n    \"samples_ts\": [ 2.88609, 2.89343, 2.89323 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:58:18Z",
+          "avg_ns": 23926395102,
+          "stddev_ns": 18874196,
+          "avg_ts": 5.349743,
+          "stddev_ts": 0.004218,
+          "samples_ns": [
+            23948114526,
+            23917095917,
+            23913974863
+          ],
+          "samples_ts": [
+            5.34489,
+            5.35182,
+            5.35252
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T15:59:55Z",
+          "avg_ns": 44276647991,
+          "stddev_ns": 64056405,
+          "avg_ts": 2.890918,
+          "stddev_ts": 0.004179,
+          "samples_ns": [
+            44350594250,
+            44238196274,
+            44241153449
+          ],
+          "samples_ts": [
+            2.88609,
+            2.89343,
+            2.89323
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 864
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:12:46.971392+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:02:11Z\",\n    \"avg_ns\": 23938610654,\n    \"stddev_ns\": 14340034,\n    \"avg_ts\": 5.347012,\n    \"stddev_ts\": 0.003202,\n    \"samples_ns\": [ 23955168908, 23930400565, 23930262489 ],\n    \"samples_ts\": [ 5.34331, 5.34884, 5.34888 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:03:47Z\",\n    \"avg_ns\": 179642013862,\n    \"stddev_ns\": 21803139,\n    \"avg_ts\": 2.850113,\n    \"stddev_ts\": 0.000346,\n    \"samples_ns\": [ 179665819101, 179637195041, 179623027445 ],\n    \"samples_ts\": [ 2.84974, 2.85019, 2.85041 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:02:11Z",
+          "avg_ns": 23938610654,
+          "stddev_ns": 14340034,
+          "avg_ts": 5.347012,
+          "stddev_ts": 0.003202,
+          "samples_ns": [
+            23955168908,
+            23930400565,
+            23930262489
+          ],
+          "samples_ts": [
+            5.34331,
+            5.34884,
+            5.34888
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:03:47Z",
+          "avg_ns": 179642013862,
+          "stddev_ns": 21803139,
+          "avg_ts": 2.850113,
+          "stddev_ts": 0.000346,
+          "samples_ns": [
+            179665819101,
+            179637195041,
+            179623027445
+          ],
+          "samples_ts": [
+            2.84974,
+            2.85019,
+            2.85041
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 865
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:21:27.748883+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:12:49Z\",\n    \"avg_ns\": 96307655341,\n    \"stddev_ns\": 4121778,\n    \"avg_ts\": 5.316296,\n    \"stddev_ts\": 0.000227,\n    \"samples_ns\": [ 96303702965, 96311906786, 96307356273 ],\n    \"samples_ts\": [ 5.31651, 5.31606, 5.31631 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:19:14Z\",\n    \"avg_ns\": 44278049516,\n    \"stddev_ns\": 11405763,\n    \"avg_ts\": 2.890823,\n    \"stddev_ts\": 0.000745,\n    \"samples_ns\": [ 44291030235, 44273487143, 44269631170 ],\n    \"samples_ts\": [ 2.88998, 2.89112, 2.89137 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:12:49Z",
+          "avg_ns": 96307655341,
+          "stddev_ns": 4121778,
+          "avg_ts": 5.316296,
+          "stddev_ts": 0.000227,
+          "samples_ns": [
+            96303702965,
+            96311906786,
+            96307356273
+          ],
+          "samples_ts": [
+            5.31651,
+            5.31606,
+            5.31631
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:19:14Z",
+          "avg_ns": 44278049516,
+          "stddev_ns": 11405763,
+          "avg_ts": 2.890823,
+          "stddev_ts": 0.000745,
+          "samples_ns": [
+            44291030235,
+            44273487143,
+            44269631170
+          ],
+          "samples_ts": [
+            2.88998,
+            2.89112,
+            2.89137
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 866
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:36:53.280553+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:21:29Z\",\n    \"avg_ns\": 96287116916,\n    \"stddev_ns\": 1088177,\n    \"avg_ts\": 5.317430,\n    \"stddev_ts\": 0.000058,\n    \"samples_ns\": [ 96288229213, 96286160844, 96286960692 ],\n    \"samples_ts\": [ 5.31737, 5.31748, 5.31744 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:27:55Z\",\n    \"avg_ns\": 179245323571,\n    \"stddev_ns\": 7704227,\n    \"avg_ts\": 2.856420,\n    \"stddev_ts\": 0.000123,\n    \"samples_ns\": [ 179254217236, 179240697417, 179241056060 ],\n    \"samples_ts\": [ 2.85628, 2.85649, 2.85649 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:21:29Z",
+          "avg_ns": 96287116916,
+          "stddev_ns": 1088177,
+          "avg_ts": 5.31743,
+          "stddev_ts": 5.8e-05,
+          "samples_ns": [
+            96288229213,
+            96286160844,
+            96286960692
+          ],
+          "samples_ts": [
+            5.31737,
+            5.31748,
+            5.31744
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:27:55Z",
+          "avg_ns": 179245323571,
+          "stddev_ns": 7704227,
+          "avg_ts": 2.85642,
+          "stddev_ts": 0.000123,
+          "samples_ns": [
+            179254217236,
+            179240697417,
+            179241056060
+          ],
+          "samples_ts": [
+            2.85628,
+            2.85649,
+            2.85649
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 867
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:40:44.442174+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:36:55Z\",\n    \"avg_ns\": 23941538797,\n    \"stddev_ns\": 2130542,\n    \"avg_ts\": 5.346356,\n    \"stddev_ts\": 0.000476,\n    \"samples_ns\": [ 23943785366, 23939547235, 23941283790 ],\n    \"samples_ts\": [ 5.34585, 5.3468, 5.34641 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:38:31Z\",\n    \"avg_ns\": 44257880830,\n    \"stddev_ns\": 5039899,\n    \"avg_ts\": 2.892140,\n    \"stddev_ts\": 0.000329,\n    \"samples_ns\": [ 44263165155, 44257339445, 44253137891 ],\n    \"samples_ts\": [ 2.8918, 2.89218, 2.89245 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:36:55Z",
+          "avg_ns": 23941538797,
+          "stddev_ns": 2130542,
+          "avg_ts": 5.346356,
+          "stddev_ts": 0.000476,
+          "samples_ns": [
+            23943785366,
+            23939547235,
+            23941283790
+          ],
+          "samples_ts": [
+            5.34585,
+            5.3468,
+            5.34641
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:38:31Z",
+          "avg_ns": 44257880830,
+          "stddev_ns": 5039899,
+          "avg_ts": 2.89214,
+          "stddev_ts": 0.000329,
+          "samples_ns": [
+            44263165155,
+            44257339445,
+            44253137891
+          ],
+          "samples_ts": [
+            2.8918,
+            2.89218,
+            2.89245
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 868
+    },
+    {
+      "timestamp_utc": "2025-12-09T16:51:21.010810+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:40:46Z\",\n    \"avg_ns\": 23913470593,\n    \"stddev_ns\": 902287,\n    \"avg_ts\": 5.352632,\n    \"stddev_ts\": 0.000196,\n    \"samples_ns\": [ 23914423866, 23912702841, 23913285074 ],\n    \"samples_ts\": [ 5.35242, 5.3528, 5.35267 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:42:22Z\",\n    \"avg_ns\": 179434857419,\n    \"stddev_ns\": 4160455,\n    \"avg_ts\": 2.853403,\n    \"stddev_ts\": 0.000066,\n    \"samples_ns\": [ 179439017766, 179430740436, 179434814056 ],\n    \"samples_ts\": [ 2.85334, 2.85347, 2.8534 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:40:46Z",
+          "avg_ns": 23913470593,
+          "stddev_ns": 902287,
+          "avg_ts": 5.352632,
+          "stddev_ts": 0.000196,
+          "samples_ns": [
+            23914423866,
+            23912702841,
+            23913285074
+          ],
+          "samples_ts": [
+            5.35242,
+            5.3528,
+            5.35267
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:42:22Z",
+          "avg_ns": 179434857419,
+          "stddev_ns": 4160455,
+          "avg_ts": 2.853403,
+          "stddev_ts": 6.6e-05,
+          "samples_ns": [
+            179439017766,
+            179430740436,
+            179434814056
+          ],
+          "samples_ts": [
+            2.85334,
+            2.85347,
+            2.8534
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 869
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:00:01.975808+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:51:23Z\",\n    \"avg_ns\": 96407592480,\n    \"stddev_ns\": 1112095,\n    \"avg_ts\": 5.310785,\n    \"stddev_ts\": 0.000059,\n    \"samples_ns\": [ 96408342683, 96406369885, 96408064873 ],\n    \"samples_ts\": [ 5.31074, 5.31085, 5.31076 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T16:57:48Z\",\n    \"avg_ns\": 44233231529,\n    \"stddev_ns\": 8757924,\n    \"avg_ts\": 2.893752,\n    \"stddev_ts\": 0.000573,\n    \"samples_ns\": [ 44243336134, 44227896856, 44228461598 ],\n    \"samples_ts\": [ 2.89309, 2.8941, 2.89406 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:51:23Z",
+          "avg_ns": 96407592480,
+          "stddev_ns": 1112095,
+          "avg_ts": 5.310785,
+          "stddev_ts": 5.9e-05,
+          "samples_ns": [
+            96408342683,
+            96406369885,
+            96408064873
+          ],
+          "samples_ts": [
+            5.31074,
+            5.31085,
+            5.31076
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T16:57:48Z",
+          "avg_ns": 44233231529,
+          "stddev_ns": 8757924,
+          "avg_ts": 2.893752,
+          "stddev_ts": 0.000573,
+          "samples_ns": [
+            44243336134,
+            44227896856,
+            44228461598
+          ],
+          "samples_ts": [
+            2.89309,
+            2.8941,
+            2.89406
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 870
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:15:27.833535+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:00:04Z\",\n    \"avg_ns\": 96279076329,\n    \"stddev_ns\": 884432,\n    \"avg_ts\": 5.317874,\n    \"stddev_ts\": 0.000046,\n    \"samples_ns\": [ 96279371218, 96278141026, 96279716744 ],\n    \"samples_ts\": [ 5.31786, 5.31793, 5.31784 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:06:29Z\",\n    \"avg_ns\": 179368379185,\n    \"stddev_ns\": 11032859,\n    \"avg_ts\": 2.854461,\n    \"stddev_ts\": 0.000175,\n    \"samples_ns\": [ 179356672390, 179378542856, 179369922311 ],\n    \"samples_ts\": [ 2.85465, 2.8543, 2.85444 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:00:04Z",
+          "avg_ns": 96279076329,
+          "stddev_ns": 884432,
+          "avg_ts": 5.317874,
+          "stddev_ts": 4.6e-05,
+          "samples_ns": [
+            96279371218,
+            96278141026,
+            96279716744
+          ],
+          "samples_ts": [
+            5.31786,
+            5.31793,
+            5.31784
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:06:29Z",
+          "avg_ns": 179368379185,
+          "stddev_ns": 11032859,
+          "avg_ts": 2.854461,
+          "stddev_ts": 0.000175,
+          "samples_ns": [
+            179356672390,
+            179378542856,
+            179369922311
+          ],
+          "samples_ts": [
+            2.85465,
+            2.8543,
+            2.85444
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 871
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:19:18.918909+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:15:29Z\",\n    \"avg_ns\": 23936372454,\n    \"stddev_ns\": 2854056,\n    \"avg_ts\": 5.347510,\n    \"stddev_ts\": 0.000638,\n    \"samples_ns\": [ 23939191016, 23936442166, 23933484180 ],\n    \"samples_ts\": [ 5.34688, 5.34749, 5.34816 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:17:05Z\",\n    \"avg_ns\": 44243159543,\n    \"stddev_ns\": 3272081,\n    \"avg_ts\": 2.893103,\n    \"stddev_ts\": 0.000214,\n    \"samples_ns\": [ 44246913549, 44240912278, 44241652802 ],\n    \"samples_ts\": [ 2.89286, 2.89325, 2.8932 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:15:29Z",
+          "avg_ns": 23936372454,
+          "stddev_ns": 2854056,
+          "avg_ts": 5.34751,
+          "stddev_ts": 0.000638,
+          "samples_ns": [
+            23939191016,
+            23936442166,
+            23933484180
+          ],
+          "samples_ts": [
+            5.34688,
+            5.34749,
+            5.34816
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:17:05Z",
+          "avg_ns": 44243159543,
+          "stddev_ns": 3272081,
+          "avg_ts": 2.893103,
+          "stddev_ts": 0.000214,
+          "samples_ns": [
+            44246913549,
+            44240912278,
+            44241652802
+          ],
+          "samples_ts": [
+            2.89286,
+            2.89325,
+            2.8932
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 872
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:29:55.933748+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:19:21Z\",\n    \"avg_ns\": 23936516894,\n    \"stddev_ns\": 1152960,\n    \"avg_ts\": 5.347478,\n    \"stddev_ts\": 0.000255,\n    \"samples_ns\": [ 23935702891, 23937823024, 23936024768 ],\n    \"samples_ts\": [ 5.34766, 5.34719, 5.34759 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:20:56Z\",\n    \"avg_ns\": 179547397809,\n    \"stddev_ns\": 11551865,\n    \"avg_ts\": 2.851615,\n    \"stddev_ts\": 0.000183,\n    \"samples_ns\": [ 179560337372, 179538187173, 179543668884 ],\n    \"samples_ts\": [ 2.85141, 2.85176, 2.85167 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:19:21Z",
+          "avg_ns": 23936516894,
+          "stddev_ns": 1152960,
+          "avg_ts": 5.347478,
+          "stddev_ts": 0.000255,
+          "samples_ns": [
+            23935702891,
+            23937823024,
+            23936024768
+          ],
+          "samples_ts": [
+            5.34766,
+            5.34719,
+            5.34759
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:20:56Z",
+          "avg_ns": 179547397809,
+          "stddev_ns": 11551865,
+          "avg_ts": 2.851615,
+          "stddev_ts": 0.000183,
+          "samples_ns": [
+            179560337372,
+            179538187173,
+            179543668884
+          ],
+          "samples_ts": [
+            2.85141,
+            2.85176,
+            2.85167
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 873
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:38:40.742857+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:29:58Z\",\n    \"avg_ns\": 97258851212,\n    \"stddev_ns\": 2753731,\n    \"avg_ts\": 5.264302,\n    \"stddev_ts\": 0.000147,\n    \"samples_ns\": [ 97256104122, 97258909981, 97261539535 ],\n    \"samples_ts\": [ 5.26445, 5.2643, 5.26416 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:36:27Z\",\n    \"avg_ns\": 44385037147,\n    \"stddev_ns\": 13975489,\n    \"avg_ts\": 2.883855,\n    \"stddev_ts\": 0.000908,\n    \"samples_ns\": [ 44401022736, 44378957469, 44375131236 ],\n    \"samples_ts\": [ 2.88282, 2.88425, 2.8845 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:29:58Z",
+          "avg_ns": 97258851212,
+          "stddev_ns": 2753731,
+          "avg_ts": 5.264302,
+          "stddev_ts": 0.000147,
+          "samples_ns": [
+            97256104122,
+            97258909981,
+            97261539535
+          ],
+          "samples_ts": [
+            5.26445,
+            5.2643,
+            5.26416
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:36:27Z",
+          "avg_ns": 44385037147,
+          "stddev_ns": 13975489,
+          "avg_ts": 2.883855,
+          "stddev_ts": 0.000908,
+          "samples_ns": [
+            44401022736,
+            44378957469,
+            44375131236
+          ],
+          "samples_ts": [
+            2.88282,
+            2.88425,
+            2.8845
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 874
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:54:09.682316+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:38:42Z\",\n    \"avg_ns\": 97299134617,\n    \"stddev_ns\": 2169128,\n    \"avg_ts\": 5.262123,\n    \"stddev_ts\": 0.000115,\n    \"samples_ns\": [ 97296988407, 97299180174, 97301235272 ],\n    \"samples_ts\": [ 5.26224, 5.26212, 5.26201 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:45:12Z\",\n    \"avg_ns\": 179018705307,\n    \"stddev_ns\": 9025117,\n    \"avg_ts\": 2.860036,\n    \"stddev_ts\": 0.000144,\n    \"samples_ns\": [ 179027769424, 179018586417, 179009760082 ],\n    \"samples_ts\": [ 2.85989, 2.86004, 2.86018 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:38:42Z",
+          "avg_ns": 97299134617,
+          "stddev_ns": 2169128,
+          "avg_ts": 5.262123,
+          "stddev_ts": 0.000115,
+          "samples_ns": [
+            97296988407,
+            97299180174,
+            97301235272
+          ],
+          "samples_ts": [
+            5.26224,
+            5.26212,
+            5.26201
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:45:12Z",
+          "avg_ns": 179018705307,
+          "stddev_ns": 9025117,
+          "avg_ts": 2.860036,
+          "stddev_ts": 0.000144,
+          "samples_ns": [
+            179027769424,
+            179018586417,
+            179009760082
+          ],
+          "samples_ts": [
+            2.85989,
+            2.86004,
+            2.86018
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 875
+    },
+    {
+      "timestamp_utc": "2025-12-09T17:58:00.918224+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:54:11Z\",\n    \"avg_ns\": 23928868231,\n    \"stddev_ns\": 4487698,\n    \"avg_ts\": 5.349188,\n    \"stddev_ts\": 0.001003,\n    \"samples_ns\": [ 23933987097, 23927006899, 23925610697 ],\n    \"samples_ts\": [ 5.34804, 5.3496, 5.34992 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:55:47Z\",\n    \"avg_ns\": 44287100089,\n    \"stddev_ns\": 4685121,\n    \"avg_ts\": 2.890232,\n    \"stddev_ts\": 0.000305,\n    \"samples_ns\": [ 44284277065, 44284526042, 44292497162 ],\n    \"samples_ts\": [ 2.89042, 2.8904, 2.88988 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:54:11Z",
+          "avg_ns": 23928868231,
+          "stddev_ns": 4487698,
+          "avg_ts": 5.349188,
+          "stddev_ts": 0.001003,
+          "samples_ns": [
+            23933987097,
+            23927006899,
+            23925610697
+          ],
+          "samples_ts": [
+            5.34804,
+            5.3496,
+            5.34992
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:55:47Z",
+          "avg_ns": 44287100089,
+          "stddev_ns": 4685121,
+          "avg_ts": 2.890232,
+          "stddev_ts": 0.000305,
+          "samples_ns": [
+            44284277065,
+            44284526042,
+            44292497162
+          ],
+          "samples_ts": [
+            2.89042,
+            2.8904,
+            2.88988
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 876
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:08:36.701845+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:58:03Z\",\n    \"avg_ns\": 23931422717,\n    \"stddev_ns\": 7527147,\n    \"avg_ts\": 5.348617,\n    \"stddev_ts\": 0.001682,\n    \"samples_ns\": [ 23940112427, 23927050967, 23927104758 ],\n    \"samples_ts\": [ 5.34667, 5.34959, 5.34958 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T17:59:38Z\",\n    \"avg_ns\": 179151424008,\n    \"stddev_ns\": 10111328,\n    \"avg_ts\": 2.857918,\n    \"stddev_ts\": 0.000161,\n    \"samples_ns\": [ 179162298584, 179149667298, 179142306142 ],\n    \"samples_ts\": [ 2.85774, 2.85795, 2.85806 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:58:03Z",
+          "avg_ns": 23931422717,
+          "stddev_ns": 7527147,
+          "avg_ts": 5.348617,
+          "stddev_ts": 0.001682,
+          "samples_ns": [
+            23940112427,
+            23927050967,
+            23927104758
+          ],
+          "samples_ts": [
+            5.34667,
+            5.34959,
+            5.34958
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T17:59:38Z",
+          "avg_ns": 179151424008,
+          "stddev_ns": 10111328,
+          "avg_ts": 2.857918,
+          "stddev_ts": 0.000161,
+          "samples_ns": [
+            179162298584,
+            179149667298,
+            179142306142
+          ],
+          "samples_ts": [
+            2.85774,
+            2.85795,
+            2.85806
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 877
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:17:17.359302+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:08:38Z\",\n    \"avg_ns\": 96291549717,\n    \"stddev_ns\": 2131343,\n    \"avg_ts\": 5.317185,\n    \"stddev_ts\": 0.000118,\n    \"samples_ns\": [ 96292122241, 96289190586, 96293336324 ],\n    \"samples_ts\": [ 5.31715, 5.31732, 5.31709 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:15:04Z\",\n    \"avg_ns\": 44237674951,\n    \"stddev_ns\": 10785292,\n    \"avg_ts\": 2.893461,\n    \"stddev_ts\": 0.000705,\n    \"samples_ns\": [ 44249969803, 44229809972, 44233245078 ],\n    \"samples_ts\": [ 2.89266, 2.89398, 2.89375 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:08:38Z",
+          "avg_ns": 96291549717,
+          "stddev_ns": 2131343,
+          "avg_ts": 5.317185,
+          "stddev_ts": 0.000118,
+          "samples_ns": [
+            96292122241,
+            96289190586,
+            96293336324
+          ],
+          "samples_ts": [
+            5.31715,
+            5.31732,
+            5.31709
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:15:04Z",
+          "avg_ns": 44237674951,
+          "stddev_ns": 10785292,
+          "avg_ts": 2.893461,
+          "stddev_ts": 0.000705,
+          "samples_ns": [
+            44249969803,
+            44229809972,
+            44233245078
+          ],
+          "samples_ts": [
+            2.89266,
+            2.89398,
+            2.89375
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 878
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:32:42.515778+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:17:19Z\",\n    \"avg_ns\": 96275209883,\n    \"stddev_ns\": 1905201,\n    \"avg_ts\": 5.318088,\n    \"stddev_ts\": 0.000102,\n    \"samples_ns\": [ 96277334177, 96274377470, 96273918004 ],\n    \"samples_ts\": [ 5.31797, 5.31813, 5.31816 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:23:44Z\",\n    \"avg_ns\": 179128494829,\n    \"stddev_ns\": 13519931,\n    \"avg_ts\": 2.858283,\n    \"stddev_ts\": 0.000216,\n    \"samples_ns\": [ 179143706627, 179117908794, 179123869068 ],\n    \"samples_ts\": [ 2.85804, 2.85845, 2.85836 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:17:19Z",
+          "avg_ns": 96275209883,
+          "stddev_ns": 1905201,
+          "avg_ts": 5.318088,
+          "stddev_ts": 0.000102,
+          "samples_ns": [
+            96277334177,
+            96274377470,
+            96273918004
+          ],
+          "samples_ts": [
+            5.31797,
+            5.31813,
+            5.31816
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:23:44Z",
+          "avg_ns": 179128494829,
+          "stddev_ns": 13519931,
+          "avg_ts": 2.858283,
+          "stddev_ts": 0.000216,
+          "samples_ns": [
+            179143706627,
+            179117908794,
+            179123869068
+          ],
+          "samples_ts": [
+            2.85804,
+            2.85845,
+            2.85836
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 879
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:36:33.626633+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:32:44Z\",\n    \"avg_ns\": 23924313064,\n    \"stddev_ns\": 1274112,\n    \"avg_ts\": 5.350206,\n    \"stddev_ts\": 0.000285,\n    \"samples_ns\": [ 23925411952, 23922916451, 23924610789 ],\n    \"samples_ts\": [ 5.34996, 5.35052, 5.35014 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:34:20Z\",\n    \"avg_ns\": 44262402842,\n    \"stddev_ns\": 1137824,\n    \"avg_ts\": 2.891845,\n    \"stddev_ts\": 0.000074,\n    \"samples_ns\": [ 44262910867, 44263198152, 44261099507 ],\n    \"samples_ts\": [ 2.89181, 2.89179, 2.89193 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:32:44Z",
+          "avg_ns": 23924313064,
+          "stddev_ns": 1274112,
+          "avg_ts": 5.350206,
+          "stddev_ts": 0.000285,
+          "samples_ns": [
+            23925411952,
+            23922916451,
+            23924610789
+          ],
+          "samples_ts": [
+            5.34996,
+            5.35052,
+            5.35014
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:34:20Z",
+          "avg_ns": 44262402842,
+          "stddev_ns": 1137824,
+          "avg_ts": 2.891845,
+          "stddev_ts": 7.4e-05,
+          "samples_ns": [
+            44262910867,
+            44263198152,
+            44261099507
+          ],
+          "samples_ts": [
+            2.89181,
+            2.89179,
+            2.89193
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 880
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:47:10.254094+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:36:35Z\",\n    \"avg_ns\": 23933944188,\n    \"stddev_ns\": 599202,\n    \"avg_ts\": 5.348053,\n    \"stddev_ts\": 0.000134,\n    \"samples_ns\": [ 23934619514, 23933476159, 23933736891 ],\n    \"samples_ts\": [ 5.3479, 5.34816, 5.3481 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:38:11Z\",\n    \"avg_ns\": 179420545871,\n    \"stddev_ns\": 2672446,\n    \"avg_ts\": 2.853631,\n    \"stddev_ts\": 0.000043,\n    \"samples_ns\": [ 179419775788, 179418343018, 179423518807 ],\n    \"samples_ts\": [ 2.85364, 2.85367, 2.85358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:36:35Z",
+          "avg_ns": 23933944188,
+          "stddev_ns": 599202,
+          "avg_ts": 5.348053,
+          "stddev_ts": 0.000134,
+          "samples_ns": [
+            23934619514,
+            23933476159,
+            23933736891
+          ],
+          "samples_ts": [
+            5.3479,
+            5.34816,
+            5.3481
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:38:11Z",
+          "avg_ns": 179420545871,
+          "stddev_ns": 2672446,
+          "avg_ts": 2.853631,
+          "stddev_ts": 4.3e-05,
+          "samples_ns": [
+            179419775788,
+            179418343018,
+            179423518807
+          ],
+          "samples_ts": [
+            2.85364,
+            2.85367,
+            2.85358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 881
+    },
+    {
+      "timestamp_utc": "2025-12-09T18:55:51.598601+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:47:12Z\",\n    \"avg_ns\": 96463215179,\n    \"stddev_ns\": 1902483,\n    \"avg_ts\": 5.307723,\n    \"stddev_ts\": 0.000102,\n    \"samples_ns\": [ 96463875096, 96461124568, 96464645875 ],\n    \"samples_ts\": [ 5.30769, 5.30784, 5.30764 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:53:38Z\",\n    \"avg_ns\": 44285480691,\n    \"stddev_ns\": 14571532,\n    \"avg_ts\": 2.890338,\n    \"stddev_ts\": 0.000951,\n    \"samples_ns\": [ 44302289230, 44277700960, 44276451884 ],\n    \"samples_ts\": [ 2.88924, 2.89085, 2.89093 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:47:12Z",
+          "avg_ns": 96463215179,
+          "stddev_ns": 1902483,
+          "avg_ts": 5.307723,
+          "stddev_ts": 0.000102,
+          "samples_ns": [
+            96463875096,
+            96461124568,
+            96464645875
+          ],
+          "samples_ts": [
+            5.30769,
+            5.30784,
+            5.30764
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:53:38Z",
+          "avg_ns": 44285480691,
+          "stddev_ns": 14571532,
+          "avg_ts": 2.890338,
+          "stddev_ts": 0.000951,
+          "samples_ns": [
+            44302289230,
+            44277700960,
+            44276451884
+          ],
+          "samples_ts": [
+            2.88924,
+            2.89085,
+            2.89093
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 882
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:11:17.283317+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T18:55:53Z\",\n    \"avg_ns\": 96432866167,\n    \"stddev_ns\": 1408845,\n    \"avg_ts\": 5.309393,\n    \"stddev_ts\": 0.000076,\n    \"samples_ns\": [ 96434426507, 96432335836, 96431836159 ],\n    \"samples_ts\": [ 5.30931, 5.30942, 5.30945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:02:19Z\",\n    \"avg_ns\": 179099517697,\n    \"stddev_ns\": 8301569,\n    \"avg_ts\": 2.858746,\n    \"stddev_ts\": 0.000132,\n    \"samples_ns\": [ 179108842113, 179096685603, 179093025377 ],\n    \"samples_ts\": [ 2.8586, 2.85879, 2.85885 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T18:55:53Z",
+          "avg_ns": 96432866167,
+          "stddev_ns": 1408845,
+          "avg_ts": 5.309393,
+          "stddev_ts": 7.6e-05,
+          "samples_ns": [
+            96434426507,
+            96432335836,
+            96431836159
+          ],
+          "samples_ts": [
+            5.30931,
+            5.30942,
+            5.30945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:02:19Z",
+          "avg_ns": 179099517697,
+          "stddev_ns": 8301569,
+          "avg_ts": 2.858746,
+          "stddev_ts": 0.000132,
+          "samples_ns": [
+            179108842113,
+            179096685603,
+            179093025377
+          ],
+          "samples_ts": [
+            2.8586,
+            2.85879,
+            2.85885
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 883
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:15:08.921198+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:11:19Z\",\n    \"avg_ns\": 23965394952,\n    \"stddev_ns\": 639471,\n    \"avg_ts\": 5.341034,\n    \"stddev_ts\": 0.000134,\n    \"samples_ns\": [ 23965834801, 23965639672, 23964710385 ],\n    \"samples_ts\": [ 5.34094, 5.34098, 5.34119 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:12:55Z\",\n    \"avg_ns\": 44388613908,\n    \"stddev_ns\": 7926416,\n    \"avg_ts\": 2.883622,\n    \"stddev_ts\": 0.000515,\n    \"samples_ns\": [ 44397759830, 44384099194, 44383982702 ],\n    \"samples_ts\": [ 2.88303, 2.88392, 2.88392 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:11:19Z",
+          "avg_ns": 23965394952,
+          "stddev_ns": 639471,
+          "avg_ts": 5.341034,
+          "stddev_ts": 0.000134,
+          "samples_ns": [
+            23965834801,
+            23965639672,
+            23964710385
+          ],
+          "samples_ts": [
+            5.34094,
+            5.34098,
+            5.34119
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:12:55Z",
+          "avg_ns": 44388613908,
+          "stddev_ns": 7926416,
+          "avg_ts": 2.883622,
+          "stddev_ts": 0.000515,
+          "samples_ns": [
+            44397759830,
+            44384099194,
+            44383982702
+          ],
+          "samples_ts": [
+            2.88303,
+            2.88392,
+            2.88392
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 884
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:25:44.582324+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:15:11Z\",\n    \"avg_ns\": 23915116974,\n    \"stddev_ns\": 5617965,\n    \"avg_ts\": 5.352263,\n    \"stddev_ts\": 0.001257,\n    \"samples_ns\": [ 23921591936, 23911573259, 23912185728 ],\n    \"samples_ts\": [ 5.35081, 5.35306, 5.35292 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:16:46Z\",\n    \"avg_ns\": 179124603355,\n    \"stddev_ns\": 7901567,\n    \"avg_ts\": 2.858345,\n    \"stddev_ts\": 0.000126,\n    \"samples_ns\": [ 179133704703, 179119496919, 179120608443 ],\n    \"samples_ts\": [ 2.8582, 2.85843, 2.85841 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:15:11Z",
+          "avg_ns": 23915116974,
+          "stddev_ns": 5617965,
+          "avg_ts": 5.352263,
+          "stddev_ts": 0.001257,
+          "samples_ns": [
+            23921591936,
+            23911573259,
+            23912185728
+          ],
+          "samples_ts": [
+            5.35081,
+            5.35306,
+            5.35292
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:16:46Z",
+          "avg_ns": 179124603355,
+          "stddev_ns": 7901567,
+          "avg_ts": 2.858345,
+          "stddev_ts": 0.000126,
+          "samples_ns": [
+            179133704703,
+            179119496919,
+            179120608443
+          ],
+          "samples_ts": [
+            2.8582,
+            2.85843,
+            2.85841
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 885
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:34:30.399190+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:25:46Z\",\n    \"avg_ns\": 97464845475,\n    \"stddev_ns\": 1251285,\n    \"avg_ts\": 5.253176,\n    \"stddev_ts\": 0.000063,\n    \"samples_ns\": [ 97465147257, 97465835848, 97463553322 ],\n    \"samples_ts\": [ 5.25316, 5.25312, 5.25325 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:32:16Z\",\n    \"avg_ns\": 44394433083,\n    \"stddev_ns\": 20545903,\n    \"avg_ts\": 2.883245,\n    \"stddev_ts\": 0.001334,\n    \"samples_ns\": [ 44418116910, 44383791762, 44381390577 ],\n    \"samples_ts\": [ 2.88171, 2.88394, 2.88409 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:25:46Z",
+          "avg_ns": 97464845475,
+          "stddev_ns": 1251285,
+          "avg_ts": 5.253176,
+          "stddev_ts": 6.3e-05,
+          "samples_ns": [
+            97465147257,
+            97465835848,
+            97463553322
+          ],
+          "samples_ts": [
+            5.25316,
+            5.25312,
+            5.25325
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:32:16Z",
+          "avg_ns": 44394433083,
+          "stddev_ns": 20545903,
+          "avg_ts": 2.883245,
+          "stddev_ts": 0.001334,
+          "samples_ns": [
+            44418116910,
+            44383791762,
+            44381390577
+          ],
+          "samples_ts": [
+            2.88171,
+            2.88394,
+            2.88409
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 886
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:50:00.773723+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:34:32Z\",\n    \"avg_ns\": 97344571814,\n    \"stddev_ns\": 2672130,\n    \"avg_ts\": 5.259667,\n    \"stddev_ts\": 0.000143,\n    \"samples_ns\": [ 97341884271, 97347190642, 97344640530 ],\n    \"samples_ts\": [ 5.25981, 5.25953, 5.25966 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:41:01Z\",\n    \"avg_ns\": 179433467410,\n    \"stddev_ns\": 21511803,\n    \"avg_ts\": 2.853425,\n    \"stddev_ts\": 0.000342,\n    \"samples_ns\": [ 179458300939, 179421274576, 179420826716 ],\n    \"samples_ts\": [ 2.85303, 2.85362, 2.85363 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:34:32Z",
+          "avg_ns": 97344571814,
+          "stddev_ns": 2672130,
+          "avg_ts": 5.259667,
+          "stddev_ts": 0.000143,
+          "samples_ns": [
+            97341884271,
+            97347190642,
+            97344640530
+          ],
+          "samples_ts": [
+            5.25981,
+            5.25953,
+            5.25966
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:41:01Z",
+          "avg_ns": 179433467410,
+          "stddev_ns": 21511803,
+          "avg_ts": 2.853425,
+          "stddev_ts": 0.000342,
+          "samples_ns": [
+            179458300939,
+            179421274576,
+            179420826716
+          ],
+          "samples_ts": [
+            2.85303,
+            2.85362,
+            2.85363
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 887
+    },
+    {
+      "timestamp_utc": "2025-12-09T19:53:51.940544+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:50:02Z\",\n    \"avg_ns\": 23941382358,\n    \"stddev_ns\": 7506551,\n    \"avg_ts\": 5.346392,\n    \"stddev_ts\": 0.001676,\n    \"samples_ns\": [ 23949770066, 23939081408, 23935295600 ],\n    \"samples_ts\": [ 5.34452, 5.34691, 5.34775 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:51:38Z\",\n    \"avg_ns\": 44253930427,\n    \"stddev_ns\": 4735630,\n    \"avg_ts\": 2.892398,\n    \"stddev_ts\": 0.000310,\n    \"samples_ns\": [ 44259279769, 44252237898, 44250273614 ],\n    \"samples_ts\": [ 2.89205, 2.89251, 2.89264 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:50:02Z",
+          "avg_ns": 23941382358,
+          "stddev_ns": 7506551,
+          "avg_ts": 5.346392,
+          "stddev_ts": 0.001676,
+          "samples_ns": [
+            23949770066,
+            23939081408,
+            23935295600
+          ],
+          "samples_ts": [
+            5.34452,
+            5.34691,
+            5.34775
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:51:38Z",
+          "avg_ns": 44253930427,
+          "stddev_ns": 4735630,
+          "avg_ts": 2.892398,
+          "stddev_ts": 0.00031,
+          "samples_ns": [
+            44259279769,
+            44252237898,
+            44250273614
+          ],
+          "samples_ts": [
+            2.89205,
+            2.89251,
+            2.89264
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 888
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:04:27.503885+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:53:54Z\",\n    \"avg_ns\": 23908802780,\n    \"stddev_ns\": 2135412,\n    \"avg_ts\": 5.353677,\n    \"stddev_ts\": 0.000478,\n    \"samples_ns\": [ 23911117477, 23908381380, 23906909483 ],\n    \"samples_ts\": [ 5.35316, 5.35377, 5.3541 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T19:55:29Z\",\n    \"avg_ns\": 179099443408,\n    \"stddev_ns\": 4277748,\n    \"avg_ts\": 2.858747,\n    \"stddev_ts\": 0.000068,\n    \"samples_ns\": [ 179104357756, 179097069436, 179096903033 ],\n    \"samples_ts\": [ 2.85867, 2.85878, 2.85879 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:53:54Z",
+          "avg_ns": 23908802780,
+          "stddev_ns": 2135412,
+          "avg_ts": 5.353677,
+          "stddev_ts": 0.000478,
+          "samples_ns": [
+            23911117477,
+            23908381380,
+            23906909483
+          ],
+          "samples_ts": [
+            5.35316,
+            5.35377,
+            5.3541
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T19:55:29Z",
+          "avg_ns": 179099443408,
+          "stddev_ns": 4277748,
+          "avg_ts": 2.858747,
+          "stddev_ts": 6.8e-05,
+          "samples_ns": [
+            179104357756,
+            179097069436,
+            179096903033
+          ],
+          "samples_ts": [
+            2.85867,
+            2.85878,
+            2.85879
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 889
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:13:08.270670+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:04:29Z\",\n    \"avg_ns\": 96289744323,\n    \"stddev_ns\": 2827684,\n    \"avg_ts\": 5.317285,\n    \"stddev_ts\": 0.000156,\n    \"samples_ns\": [ 96292672109, 96289532170, 96287028690 ],\n    \"samples_ts\": [ 5.31712, 5.3173, 5.31743 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:10:54Z\",\n    \"avg_ns\": 44325419391,\n    \"stddev_ns\": 6799612,\n    \"avg_ts\": 2.887734,\n    \"stddev_ts\": 0.000443,\n    \"samples_ns\": [ 44333208532, 44322353898, 44320695744 ],\n    \"samples_ts\": [ 2.88723, 2.88793, 2.88804 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:04:29Z",
+          "avg_ns": 96289744323,
+          "stddev_ns": 2827684,
+          "avg_ts": 5.317285,
+          "stddev_ts": 0.000156,
+          "samples_ns": [
+            96292672109,
+            96289532170,
+            96287028690
+          ],
+          "samples_ts": [
+            5.31712,
+            5.3173,
+            5.31743
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:10:54Z",
+          "avg_ns": 44325419391,
+          "stddev_ns": 6799612,
+          "avg_ts": 2.887734,
+          "stddev_ts": 0.000443,
+          "samples_ns": [
+            44333208532,
+            44322353898,
+            44320695744
+          ],
+          "samples_ts": [
+            2.88723,
+            2.88793,
+            2.88804
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 890
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:28:32.932699+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:13:10Z\",\n    \"avg_ns\": 96254868033,\n    \"stddev_ns\": 2519135,\n    \"avg_ts\": 5.319211,\n    \"stddev_ts\": 0.000138,\n    \"samples_ns\": [ 96256370081, 96256251882, 96251982137 ],\n    \"samples_ts\": [ 5.31913, 5.31914, 5.31937 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:19:35Z\",\n    \"avg_ns\": 179006076437,\n    \"stddev_ns\": 12168347,\n    \"avg_ts\": 2.860238,\n    \"stddev_ts\": 0.000194,\n    \"samples_ns\": [ 179018909386, 179004615288, 178994704637 ],\n    \"samples_ts\": [ 2.86003, 2.86026, 2.86042 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:13:10Z",
+          "avg_ns": 96254868033,
+          "stddev_ns": 2519135,
+          "avg_ts": 5.319211,
+          "stddev_ts": 0.000138,
+          "samples_ns": [
+            96256370081,
+            96256251882,
+            96251982137
+          ],
+          "samples_ts": [
+            5.31913,
+            5.31914,
+            5.31937
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:19:35Z",
+          "avg_ns": 179006076437,
+          "stddev_ns": 12168347,
+          "avg_ts": 2.860238,
+          "stddev_ts": 0.000194,
+          "samples_ns": [
+            179018909386,
+            179004615288,
+            178994704637
+          ],
+          "samples_ts": [
+            2.86003,
+            2.86026,
+            2.86042
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 891
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:32:24.114942+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:28:35Z\",\n    \"avg_ns\": 23923617491,\n    \"stddev_ns\": 1839369,\n    \"avg_ts\": 5.350361,\n    \"stddev_ts\": 0.000410,\n    \"samples_ns\": [ 23925648343, 23922086227, 23923117904 ],\n    \"samples_ts\": [ 5.34991, 5.3507, 5.35047 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:30:10Z\",\n    \"avg_ns\": 44268575326,\n    \"stddev_ns\": 7524805,\n    \"avg_ts\": 2.891442,\n    \"stddev_ts\": 0.000491,\n    \"samples_ns\": [ 44277247437, 44264599872, 44263878671 ],\n    \"samples_ts\": [ 2.89088, 2.8917, 2.89175 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:28:35Z",
+          "avg_ns": 23923617491,
+          "stddev_ns": 1839369,
+          "avg_ts": 5.350361,
+          "stddev_ts": 0.00041,
+          "samples_ns": [
+            23925648343,
+            23922086227,
+            23923117904
+          ],
+          "samples_ts": [
+            5.34991,
+            5.3507,
+            5.35047
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:30:10Z",
+          "avg_ns": 44268575326,
+          "stddev_ns": 7524805,
+          "avg_ts": 2.891442,
+          "stddev_ts": 0.000491,
+          "samples_ns": [
+            44277247437,
+            44264599872,
+            44263878671
+          ],
+          "samples_ts": [
+            2.89088,
+            2.8917,
+            2.89175
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 892
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:43:01.491121+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:32:26Z\",\n    \"avg_ns\": 23904101576,\n    \"stddev_ns\": 1212607,\n    \"avg_ts\": 5.354730,\n    \"stddev_ts\": 0.000272,\n    \"samples_ns\": [ 23903591246, 23905485940, 23903227542 ],\n    \"samples_ts\": [ 5.35484, 5.35442, 5.35493 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:34:01Z\",\n    \"avg_ns\": 179712890662,\n    \"stddev_ns\": 5151225,\n    \"avg_ts\": 2.848989,\n    \"stddev_ts\": 0.000081,\n    \"samples_ns\": [ 179718796259, 179710074424, 179709801305 ],\n    \"samples_ts\": [ 2.8489, 2.84903, 2.84904 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:32:26Z",
+          "avg_ns": 23904101576,
+          "stddev_ns": 1212607,
+          "avg_ts": 5.35473,
+          "stddev_ts": 0.000272,
+          "samples_ns": [
+            23903591246,
+            23905485940,
+            23903227542
+          ],
+          "samples_ts": [
+            5.35484,
+            5.35442,
+            5.35493
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:34:01Z",
+          "avg_ns": 179712890662,
+          "stddev_ns": 5151225,
+          "avg_ts": 2.848989,
+          "stddev_ts": 8.1e-05,
+          "samples_ns": [
+            179718796259,
+            179710074424,
+            179709801305
+          ],
+          "samples_ts": [
+            2.8489,
+            2.84903,
+            2.84904
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 893
+    },
+    {
+      "timestamp_utc": "2025-12-09T20:51:43.124267+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:43:03Z\",\n    \"avg_ns\": 96422990822,\n    \"stddev_ns\": 4075574,\n    \"avg_ts\": 5.309937,\n    \"stddev_ts\": 0.000224,\n    \"samples_ns\": [ 96427353749, 96422337089, 96419281628 ],\n    \"samples_ts\": [ 5.3097, 5.30997, 5.31014 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:49:29Z\",\n    \"avg_ns\": 44427115407,\n    \"stddev_ns\": 7583801,\n    \"avg_ts\": 2.881123,\n    \"stddev_ts\": 0.000492,\n    \"samples_ns\": [ 44435853167, 44423249222, 44422243832 ],\n    \"samples_ts\": [ 2.88056, 2.88137, 2.88144 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:43:03Z",
+          "avg_ns": 96422990822,
+          "stddev_ns": 4075574,
+          "avg_ts": 5.309937,
+          "stddev_ts": 0.000224,
+          "samples_ns": [
+            96427353749,
+            96422337089,
+            96419281628
+          ],
+          "samples_ts": [
+            5.3097,
+            5.30997,
+            5.31014
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:49:29Z",
+          "avg_ns": 44427115407,
+          "stddev_ns": 7583801,
+          "avg_ts": 2.881123,
+          "stddev_ts": 0.000492,
+          "samples_ns": [
+            44435853167,
+            44423249222,
+            44422243832
+          ],
+          "samples_ts": [
+            2.88056,
+            2.88137,
+            2.88144
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 894
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:07:08.564231+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:51:45Z\",\n    \"avg_ns\": 96356818023,\n    \"stddev_ns\": 607956,\n    \"avg_ts\": 5.313584,\n    \"stddev_ts\": 0.000029,\n    \"samples_ns\": [ 96357299439, 96356892646, 96356261985 ],\n    \"samples_ts\": [ 5.31356, 5.31358, 5.31361 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T20:58:10Z\",\n    \"avg_ns\": 179127190269,\n    \"stddev_ns\": 12429645,\n    \"avg_ts\": 2.858304,\n    \"stddev_ts\": 0.000198,\n    \"samples_ns\": [ 179141430783, 179121560916, 179118579109 ],\n    \"samples_ts\": [ 2.85808, 2.85839, 2.85844 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:51:45Z",
+          "avg_ns": 96356818023,
+          "stddev_ns": 607956,
+          "avg_ts": 5.313584,
+          "stddev_ts": 2.9e-05,
+          "samples_ns": [
+            96357299439,
+            96356892646,
+            96356261985
+          ],
+          "samples_ts": [
+            5.31356,
+            5.31358,
+            5.31361
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T20:58:10Z",
+          "avg_ns": 179127190269,
+          "stddev_ns": 12429645,
+          "avg_ts": 2.858304,
+          "stddev_ts": 0.000198,
+          "samples_ns": [
+            179141430783,
+            179121560916,
+            179118579109
+          ],
+          "samples_ts": [
+            2.85808,
+            2.85839,
+            2.85844
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 895
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:11:00.216323+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:07:10Z\",\n    \"avg_ns\": 23966646809,\n    \"stddev_ns\": 3269832,\n    \"avg_ts\": 5.340756,\n    \"stddev_ts\": 0.000728,\n    \"samples_ns\": [ 23970418072, 23964793152, 23964729204 ],\n    \"samples_ts\": [ 5.33992, 5.34117, 5.34118 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:08:46Z\",\n    \"avg_ns\": 44386621865,\n    \"stddev_ns\": 2605196,\n    \"avg_ts\": 2.883752,\n    \"stddev_ts\": 0.000168,\n    \"samples_ns\": [ 44388648347, 44387510809, 44383706441 ],\n    \"samples_ts\": [ 2.88362, 2.88369, 2.88394 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:07:10Z",
+          "avg_ns": 23966646809,
+          "stddev_ns": 3269832,
+          "avg_ts": 5.340756,
+          "stddev_ts": 0.000728,
+          "samples_ns": [
+            23970418072,
+            23964793152,
+            23964729204
+          ],
+          "samples_ts": [
+            5.33992,
+            5.34117,
+            5.34118
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:08:46Z",
+          "avg_ns": 44386621865,
+          "stddev_ns": 2605196,
+          "avg_ts": 2.883752,
+          "stddev_ts": 0.000168,
+          "samples_ns": [
+            44388648347,
+            44387510809,
+            44383706441
+          ],
+          "samples_ts": [
+            2.88362,
+            2.88369,
+            2.88394
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 896
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:21:35.972928+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:11:02Z\",\n    \"avg_ns\": 23940373764,\n    \"stddev_ns\": 5355864,\n    \"avg_ts\": 5.346617,\n    \"stddev_ts\": 0.001196,\n    \"samples_ns\": [ 23946524904, 23937853079, 23936743309 ],\n    \"samples_ts\": [ 5.34524, 5.34718, 5.34743 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:12:38Z\",\n    \"avg_ns\": 179121414265,\n    \"stddev_ns\": 3347368,\n    \"avg_ts\": 2.858396,\n    \"stddev_ts\": 0.000053,\n    \"samples_ns\": [ 179125171782, 179120196229, 179118874785 ],\n    \"samples_ts\": [ 2.85834, 2.85842, 2.85844 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:11:02Z",
+          "avg_ns": 23940373764,
+          "stddev_ns": 5355864,
+          "avg_ts": 5.346617,
+          "stddev_ts": 0.001196,
+          "samples_ns": [
+            23946524904,
+            23937853079,
+            23936743309
+          ],
+          "samples_ts": [
+            5.34524,
+            5.34718,
+            5.34743
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:12:38Z",
+          "avg_ns": 179121414265,
+          "stddev_ns": 3347368,
+          "avg_ts": 2.858396,
+          "stddev_ts": 5.3e-05,
+          "samples_ns": [
+            179125171782,
+            179120196229,
+            179118874785
+          ],
+          "samples_ts": [
+            2.85834,
+            2.85842,
+            2.85844
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 897
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:30:20.835528+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:21:38Z\",\n    \"avg_ns\": 97328095467,\n    \"stddev_ns\": 2902088,\n    \"avg_ts\": 5.260557,\n    \"stddev_ts\": 0.000156,\n    \"samples_ns\": [ 97331259187, 97327418013, 97325609202 ],\n    \"samples_ts\": [ 5.26039, 5.26059, 5.26069 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:28:07Z\",\n    \"avg_ns\": 44293454488,\n    \"stddev_ns\": 14109627,\n    \"avg_ts\": 2.889818,\n    \"stddev_ts\": 0.000920,\n    \"samples_ns\": [ 44309729914, 44285960559, 44284672991 ],\n    \"samples_ts\": [ 2.88876, 2.89031, 2.89039 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:21:38Z",
+          "avg_ns": 97328095467,
+          "stddev_ns": 2902088,
+          "avg_ts": 5.260557,
+          "stddev_ts": 0.000156,
+          "samples_ns": [
+            97331259187,
+            97327418013,
+            97325609202
+          ],
+          "samples_ts": [
+            5.26039,
+            5.26059,
+            5.26069
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:28:07Z",
+          "avg_ns": 44293454488,
+          "stddev_ns": 14109627,
+          "avg_ts": 2.889818,
+          "stddev_ts": 0.00092,
+          "samples_ns": [
+            44309729914,
+            44285960559,
+            44284672991
+          ],
+          "samples_ts": [
+            2.88876,
+            2.89031,
+            2.89039
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 898
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:45:52.823155+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:30:22Z\",\n    \"avg_ns\": 97372233858,\n    \"stddev_ns\": 1589473,\n    \"avg_ts\": 5.258172,\n    \"stddev_ts\": 0.000086,\n    \"samples_ns\": [ 97371223255, 97374065974, 97371412345 ],\n    \"samples_ts\": [ 5.25823, 5.25807, 5.25822 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:36:52Z\",\n    \"avg_ns\": 179950405021,\n    \"stddev_ns\": 15422490,\n    \"avg_ts\": 2.845228,\n    \"stddev_ts\": 0.000244,\n    \"samples_ns\": [ 179968205347, 179941690743, 179941318974 ],\n    \"samples_ts\": [ 2.84495, 2.84537, 2.84537 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:30:22Z",
+          "avg_ns": 97372233858,
+          "stddev_ns": 1589473,
+          "avg_ts": 5.258172,
+          "stddev_ts": 8.6e-05,
+          "samples_ns": [
+            97371223255,
+            97374065974,
+            97371412345
+          ],
+          "samples_ts": [
+            5.25823,
+            5.25807,
+            5.25822
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:36:52Z",
+          "avg_ns": 179950405021,
+          "stddev_ns": 15422490,
+          "avg_ts": 2.845228,
+          "stddev_ts": 0.000244,
+          "samples_ns": [
+            179968205347,
+            179941690743,
+            179941318974
+          ],
+          "samples_ts": [
+            2.84495,
+            2.84537,
+            2.84537
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 899
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:48:08.409320+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:45:54Z\",\n    \"avg_ns\": 12005013346,\n    \"stddev_ns\": 14572170,\n    \"avg_ts\": 10.662223,\n    \"stddev_ts\": 0.012933,\n    \"samples_ns\": [ 12021772343, 11997937756, 11995329939 ],\n    \"samples_ts\": [ 10.6473, 10.6685, 10.6708 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:46:43Z\",\n    \"avg_ns\": 28343237016,\n    \"stddev_ns\": 9296189,\n    \"avg_ts\": 4.516069,\n    \"stddev_ts\": 0.001481,\n    \"samples_ns\": [ 28353345487, 28341306067, 28335059495 ],\n    \"samples_ts\": [ 4.51446, 4.51638, 4.51737 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:45:54Z",
+          "avg_ns": 12005013346,
+          "stddev_ns": 14572170,
+          "avg_ts": 10.662223,
+          "stddev_ts": 0.012933,
+          "samples_ns": [
+            12021772343,
+            11997937756,
+            11995329939
+          ],
+          "samples_ts": [
+            10.6473,
+            10.6685,
+            10.6708
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:46:43Z",
+          "avg_ns": 28343237016,
+          "stddev_ns": 9296189,
+          "avg_ts": 4.516069,
+          "stddev_ts": 0.001481,
+          "samples_ns": [
+            28353345487,
+            28341306067,
+            28335059495
+          ],
+          "samples_ts": [
+            4.51446,
+            4.51638,
+            4.51737
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 900
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:54:43.331773+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:48:10Z\",\n    \"avg_ns\": 12011192132,\n    \"stddev_ns\": 18332375,\n    \"avg_ts\": 10.656744,\n    \"stddev_ts\": 0.016251,\n    \"samples_ns\": [ 12032236435, 12002646079, 11998693884 ],\n    \"samples_ts\": [ 10.6381, 10.6643, 10.6678 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:48:58Z\",\n    \"avg_ns\": 114780582160,\n    \"stddev_ns\": 2751269,\n    \"avg_ts\": 4.460685,\n    \"stddev_ts\": 0.000107,\n    \"samples_ns\": [ 114779413877, 114778607821, 114783724782 ],\n    \"samples_ts\": [ 4.46073, 4.46076, 4.46056 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:48:10Z",
+          "avg_ns": 12011192132,
+          "stddev_ns": 18332375,
+          "avg_ts": 10.656744,
+          "stddev_ts": 0.016251,
+          "samples_ns": [
+            12032236435,
+            12002646079,
+            11998693884
+          ],
+          "samples_ts": [
+            10.6381,
+            10.6643,
+            10.6678
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:48:58Z",
+          "avg_ns": 114780582160,
+          "stddev_ns": 2751269,
+          "avg_ts": 4.460685,
+          "stddev_ts": 0.000107,
+          "samples_ns": [
+            114779413877,
+            114778607821,
+            114783724782
+          ],
+          "samples_ts": [
+            4.46073,
+            4.46076,
+            4.46056
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 901
+    },
+    {
+      "timestamp_utc": "2025-12-09T21:59:28.607858+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:54:45Z\",\n    \"avg_ns\": 49122107064,\n    \"stddev_ns\": 2092172,\n    \"avg_ts\": 10.423006,\n    \"stddev_ts\": 0.000441,\n    \"samples_ns\": [ 49120025757, 49122108881, 49124186555 ],\n    \"samples_ts\": [ 10.4234, 10.423, 10.4226 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:58:01Z\",\n    \"avg_ns\": 28762703254,\n    \"stddev_ns\": 9047316,\n    \"avg_ts\": 4.450208,\n    \"stddev_ts\": 0.001400,\n    \"samples_ns\": [ 28767182484, 28768633633, 28752293647 ],\n    \"samples_ts\": [ 4.44951, 4.44929, 4.45182 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:54:45Z",
+          "avg_ns": 49122107064,
+          "stddev_ns": 2092172,
+          "avg_ts": 10.423006,
+          "stddev_ts": 0.000441,
+          "samples_ns": [
+            49120025757,
+            49122108881,
+            49124186555
+          ],
+          "samples_ts": [
+            10.4234,
+            10.423,
+            10.4226
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:58:01Z",
+          "avg_ns": 28762703254,
+          "stddev_ns": 9047316,
+          "avg_ts": 4.450208,
+          "stddev_ts": 0.0014,
+          "samples_ns": [
+            28767182484,
+            28768633633,
+            28752293647
+          ],
+          "samples_ts": [
+            4.44951,
+            4.44929,
+            4.45182
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 902
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:08:30.360020+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T21:59:30Z\",\n    \"avg_ns\": 48407617935,\n    \"stddev_ns\": 265583626,\n    \"avg_ts\": 10.577059,\n    \"stddev_ts\": 0.057847,\n    \"samples_ns\": [ 48714287486, 48254233171, 48254333148 ],\n    \"samples_ts\": [ 10.5103, 10.6105, 10.6104 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:02:44Z\",\n    \"avg_ns\": 115095824750,\n    \"stddev_ns\": 19145119,\n    \"avg_ts\": 4.448467,\n    \"stddev_ts\": 0.000740,\n    \"samples_ns\": [ 115093445197, 115116042368, 115077986687 ],\n    \"samples_ts\": [ 4.44856, 4.44769, 4.44916 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T21:59:30Z",
+          "avg_ns": 48407617935,
+          "stddev_ns": 265583626,
+          "avg_ts": 10.577059,
+          "stddev_ts": 0.057847,
+          "samples_ns": [
+            48714287486,
+            48254233171,
+            48254333148
+          ],
+          "samples_ts": [
+            10.5103,
+            10.6105,
+            10.6104
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:02:44Z",
+          "avg_ns": 115095824750,
+          "stddev_ns": 19145119,
+          "avg_ts": 4.448467,
+          "stddev_ts": 0.00074,
+          "samples_ns": [
+            115093445197,
+            115116042368,
+            115077986687
+          ],
+          "samples_ts": [
+            4.44856,
+            4.44769,
+            4.44916
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 903
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:10:45.851648+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:08:32Z\",\n    \"avg_ns\": 11995775124,\n    \"stddev_ns\": 1581563,\n    \"avg_ts\": 10.670424,\n    \"stddev_ts\": 0.001404,\n    \"samples_ns\": [ 11996383813, 11993983678, 11996957882 ],\n    \"samples_ts\": [ 10.6699, 10.672, 10.6694 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:09:20Z\",\n    \"avg_ns\": 28339610839,\n    \"stddev_ns\": 17953258,\n    \"avg_ts\": 4.516648,\n    \"stddev_ts\": 0.002860,\n    \"samples_ns\": [ 28360333336, 28328746526, 28329752655 ],\n    \"samples_ts\": [ 4.51335, 4.51838, 4.51822 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:08:32Z",
+          "avg_ns": 11995775124,
+          "stddev_ns": 1581563,
+          "avg_ts": 10.670424,
+          "stddev_ts": 0.001404,
+          "samples_ns": [
+            11996383813,
+            11993983678,
+            11996957882
+          ],
+          "samples_ts": [
+            10.6699,
+            10.672,
+            10.6694
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:09:20Z",
+          "avg_ns": 28339610839,
+          "stddev_ns": 17953258,
+          "avg_ts": 4.516648,
+          "stddev_ts": 0.00286,
+          "samples_ns": [
+            28360333336,
+            28328746526,
+            28329752655
+          ],
+          "samples_ts": [
+            4.51335,
+            4.51838,
+            4.51822
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 904
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:17:21.299714+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:10:47Z\",\n    \"avg_ns\": 11998754162,\n    \"stddev_ns\": 5103748,\n    \"avg_ts\": 10.667775,\n    \"stddev_ts\": 0.004537,\n    \"samples_ns\": [ 11998318019, 12004061986, 11993882481 ],\n    \"samples_ts\": [ 10.6682, 10.6631, 10.6721 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:11:36Z\",\n    \"avg_ns\": 114980399388,\n    \"stddev_ns\": 8182692,\n    \"avg_ts\": 4.452933,\n    \"stddev_ts\": 0.000317,\n    \"samples_ns\": [ 114978132505, 114989469291, 114973596369 ],\n    \"samples_ts\": [ 4.45302, 4.45258, 4.4532 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:10:47Z",
+          "avg_ns": 11998754162,
+          "stddev_ns": 5103748,
+          "avg_ts": 10.667775,
+          "stddev_ts": 0.004537,
+          "samples_ns": [
+            11998318019,
+            12004061986,
+            11993882481
+          ],
+          "samples_ts": [
+            10.6682,
+            10.6631,
+            10.6721
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:11:36Z",
+          "avg_ns": 114980399388,
+          "stddev_ns": 8182692,
+          "avg_ts": 4.452933,
+          "stddev_ts": 0.000317,
+          "samples_ns": [
+            114978132505,
+            114989469291,
+            114973596369
+          ],
+          "samples_ts": [
+            4.45302,
+            4.45258,
+            4.4532
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 905
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:22:02.513619+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:17:23Z\",\n    \"avg_ns\": 48358580854,\n    \"stddev_ns\": 1099101,\n    \"avg_ts\": 10.587573,\n    \"stddev_ts\": 0.000241,\n    \"samples_ns\": [ 48357395725, 48359566643, 48358780194 ],\n    \"samples_ts\": [ 10.5878, 10.5874, 10.5875 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:20:36Z\",\n    \"avg_ns\": 28415863293,\n    \"stddev_ns\": 15495783,\n    \"avg_ts\": 4.504527,\n    \"stddev_ts\": 0.002456,\n    \"samples_ns\": [ 28433574147, 28404808970, 28409206763 ],\n    \"samples_ts\": [ 4.50172, 4.50628, 4.50558 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:17:23Z",
+          "avg_ns": 48358580854,
+          "stddev_ns": 1099101,
+          "avg_ts": 10.587573,
+          "stddev_ts": 0.000241,
+          "samples_ns": [
+            48357395725,
+            48359566643,
+            48358780194
+          ],
+          "samples_ts": [
+            10.5878,
+            10.5874,
+            10.5875
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:20:36Z",
+          "avg_ns": 28415863293,
+          "stddev_ns": 15495783,
+          "avg_ts": 4.504527,
+          "stddev_ts": 0.002456,
+          "samples_ns": [
+            28433574147,
+            28404808970,
+            28409206763
+          ],
+          "samples_ts": [
+            4.50172,
+            4.50628,
+            4.50558
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 906
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:31:03.116622+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:22:04Z\",\n    \"avg_ns\": 48372975254,\n    \"stddev_ns\": 1489007,\n    \"avg_ts\": 10.584422,\n    \"stddev_ts\": 0.000326,\n    \"samples_ns\": [ 48373165284, 48371400354, 48374360124 ],\n    \"samples_ts\": [ 10.5844, 10.5848, 10.5841 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:25:18Z\",\n    \"avg_ns\": 114854959082,\n    \"stddev_ns\": 77351896,\n    \"avg_ts\": 4.457798,\n    \"stddev_ts\": 0.003001,\n    \"samples_ns\": [ 114944049381, 114815926656, 114804901210 ],\n    \"samples_ts\": [ 4.45434, 4.45931, 4.45974 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:22:04Z",
+          "avg_ns": 48372975254,
+          "stddev_ns": 1489007,
+          "avg_ts": 10.584422,
+          "stddev_ts": 0.000326,
+          "samples_ns": [
+            48373165284,
+            48371400354,
+            48374360124
+          ],
+          "samples_ts": [
+            10.5844,
+            10.5848,
+            10.5841
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:25:18Z",
+          "avg_ns": 114854959082,
+          "stddev_ns": 77351896,
+          "avg_ts": 4.457798,
+          "stddev_ts": 0.003001,
+          "samples_ns": [
+            114944049381,
+            114815926656,
+            114804901210
+          ],
+          "samples_ts": [
+            4.45434,
+            4.45931,
+            4.45974
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 907
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:33:18.648481+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:31:05Z\",\n    \"avg_ns\": 12005019570,\n    \"stddev_ns\": 11376126,\n    \"avg_ts\": 10.662213,\n    \"stddev_ts\": 0.010097,\n    \"samples_ns\": [ 11997748221, 12018128252, 11999182239 ],\n    \"samples_ts\": [ 10.6687, 10.6506, 10.6674 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:31:53Z\",\n    \"avg_ns\": 28349310167,\n    \"stddev_ns\": 7904312,\n    \"avg_ts\": 4.515101,\n    \"stddev_ts\": 0.001258,\n    \"samples_ns\": [ 28358212997, 28343125397, 28346592108 ],\n    \"samples_ts\": [ 4.51368, 4.51609, 4.51553 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:31:05Z",
+          "avg_ns": 12005019570,
+          "stddev_ns": 11376126,
+          "avg_ts": 10.662213,
+          "stddev_ts": 0.010097,
+          "samples_ns": [
+            11997748221,
+            12018128252,
+            11999182239
+          ],
+          "samples_ts": [
+            10.6687,
+            10.6506,
+            10.6674
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:31:53Z",
+          "avg_ns": 28349310167,
+          "stddev_ns": 7904312,
+          "avg_ts": 4.515101,
+          "stddev_ts": 0.001258,
+          "samples_ns": [
+            28358212997,
+            28343125397,
+            28346592108
+          ],
+          "samples_ts": [
+            4.51368,
+            4.51609,
+            4.51553
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 908
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:39:54.859264+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:33:20Z\",\n    \"avg_ns\": 12005322055,\n    \"stddev_ns\": 10335070,\n    \"avg_ts\": 10.661943,\n    \"stddev_ts\": 0.009181,\n    \"samples_ns\": [ 12008261292, 12013867942, 11993836933 ],\n    \"samples_ts\": [ 10.6593, 10.6544, 10.6721 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:34:08Z\",\n    \"avg_ns\": 115229283101,\n    \"stddev_ns\": 33030550,\n    \"avg_ts\": 4.443315,\n    \"stddev_ts\": 0.001273,\n    \"samples_ns\": [ 115263683040, 115197825853, 115226340412 ],\n    \"samples_ts\": [ 4.44199, 4.44453, 4.44343 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:33:20Z",
+          "avg_ns": 12005322055,
+          "stddev_ns": 10335070,
+          "avg_ts": 10.661943,
+          "stddev_ts": 0.009181,
+          "samples_ns": [
+            12008261292,
+            12013867942,
+            11993836933
+          ],
+          "samples_ts": [
+            10.6593,
+            10.6544,
+            10.6721
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:34:08Z",
+          "avg_ns": 115229283101,
+          "stddev_ns": 33030550,
+          "avg_ts": 4.443315,
+          "stddev_ts": 0.001273,
+          "samples_ns": [
+            115263683040,
+            115197825853,
+            115226340412
+          ],
+          "samples_ts": [
+            4.44199,
+            4.44453,
+            4.44343
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 909
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:44:38.550568+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:39:57Z\",\n    \"avg_ns\": 49014850707,\n    \"stddev_ns\": 4496270,\n    \"avg_ts\": 10.445814,\n    \"stddev_ts\": 0.000956,\n    \"samples_ns\": [ 49009718356, 49016814681, 49018019086 ],\n    \"samples_ts\": [ 10.4469, 10.4454, 10.4451 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:43:13Z\",\n    \"avg_ns\": 28356443617,\n    \"stddev_ns\": 16794906,\n    \"avg_ts\": 4.513966,\n    \"stddev_ts\": 0.002672,\n    \"samples_ns\": [ 28375755600, 28348303435, 28345271818 ],\n    \"samples_ts\": [ 4.51089, 4.51526, 4.51574 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:39:57Z",
+          "avg_ns": 49014850707,
+          "stddev_ns": 4496270,
+          "avg_ts": 10.445814,
+          "stddev_ts": 0.000956,
+          "samples_ns": [
+            49009718356,
+            49016814681,
+            49018019086
+          ],
+          "samples_ts": [
+            10.4469,
+            10.4454,
+            10.4451
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:43:13Z",
+          "avg_ns": 28356443617,
+          "stddev_ns": 16794906,
+          "avg_ts": 4.513966,
+          "stddev_ts": 0.002672,
+          "samples_ns": [
+            28375755600,
+            28348303435,
+            28345271818
+          ],
+          "samples_ts": [
+            4.51089,
+            4.51526,
+            4.51574
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 910
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:53:41.945364+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:44:40Z\",\n    \"avg_ns\": 49069651736,\n    \"stddev_ns\": 935562,\n    \"avg_ts\": 10.434148,\n    \"stddev_ts\": 0.000199,\n    \"samples_ns\": [ 49070618211, 49068750501, 49069586496 ],\n    \"samples_ts\": [ 10.4339, 10.4343, 10.4342 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:47:57Z\",\n    \"avg_ns\": 114844474614,\n    \"stddev_ns\": 66355497,\n    \"avg_ts\": 4.458204,\n    \"stddev_ts\": 0.002575,\n    \"samples_ns\": [ 114920956775, 114802273514, 114810193555 ],\n    \"samples_ts\": [ 4.45524, 4.45984, 4.45953 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:44:40Z",
+          "avg_ns": 49069651736,
+          "stddev_ns": 935562,
+          "avg_ts": 10.434148,
+          "stddev_ts": 0.000199,
+          "samples_ns": [
+            49070618211,
+            49068750501,
+            49069586496
+          ],
+          "samples_ts": [
+            10.4339,
+            10.4343,
+            10.4342
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:47:57Z",
+          "avg_ns": 114844474614,
+          "stddev_ns": 66355497,
+          "avg_ts": 4.458204,
+          "stddev_ts": 0.002575,
+          "samples_ns": [
+            114920956775,
+            114802273514,
+            114810193555
+          ],
+          "samples_ts": [
+            4.45524,
+            4.45984,
+            4.45953
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 911
+    },
+    {
+      "timestamp_utc": "2025-12-09T22:55:58.404827+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:53:44Z\",\n    \"avg_ns\": 12022089183,\n    \"stddev_ns\": 22006635,\n    \"avg_ts\": 10.647092,\n    \"stddev_ts\": 0.019470,\n    \"samples_ns\": [ 12047377794, 12011600018, 12007289738 ],\n    \"samples_ts\": [ 10.6247, 10.6564, 10.6602 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:54:32Z\",\n    \"avg_ns\": 28570351584,\n    \"stddev_ns\": 5668389,\n    \"avg_ts\": 4.480169,\n    \"stddev_ts\": 0.000889,\n    \"samples_ns\": [ 28569839725, 28576258543, 28564956484 ],\n    \"samples_ts\": [ 4.48025, 4.47924, 4.48102 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:53:44Z",
+          "avg_ns": 12022089183,
+          "stddev_ns": 22006635,
+          "avg_ts": 10.647092,
+          "stddev_ts": 0.01947,
+          "samples_ns": [
+            12047377794,
+            12011600018,
+            12007289738
+          ],
+          "samples_ts": [
+            10.6247,
+            10.6564,
+            10.6602
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:54:32Z",
+          "avg_ns": 28570351584,
+          "stddev_ns": 5668389,
+          "avg_ts": 4.480169,
+          "stddev_ts": 0.000889,
+          "samples_ns": [
+            28569839725,
+            28576258543,
+            28564956484
+          ],
+          "samples_ts": [
+            4.48025,
+            4.47924,
+            4.48102
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 912
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:02:33.031083+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:56:00Z\",\n    \"avg_ns\": 11995983621,\n    \"stddev_ns\": 3160088,\n    \"avg_ts\": 10.670238,\n    \"stddev_ts\": 0.002807,\n    \"samples_ns\": [ 11999284854, 11992995633, 11995670378 ],\n    \"samples_ts\": [ 10.6673, 10.6729, 10.6705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T22:56:48Z\",\n    \"avg_ns\": 114653884513,\n    \"stddev_ns\": 9630220,\n    \"avg_ts\": 4.465614,\n    \"stddev_ts\": 0.000375,\n    \"samples_ns\": [ 114664903643, 114647125716, 114649624181 ],\n    \"samples_ts\": [ 4.46518, 4.46588, 4.46578 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:56:00Z",
+          "avg_ns": 11995983621,
+          "stddev_ns": 3160088,
+          "avg_ts": 10.670238,
+          "stddev_ts": 0.002807,
+          "samples_ns": [
+            11999284854,
+            11992995633,
+            11995670378
+          ],
+          "samples_ts": [
+            10.6673,
+            10.6729,
+            10.6705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T22:56:48Z",
+          "avg_ns": 114653884513,
+          "stddev_ns": 9630220,
+          "avg_ts": 4.465614,
+          "stddev_ts": 0.000375,
+          "samples_ns": [
+            114664903643,
+            114647125716,
+            114649624181
+          ],
+          "samples_ts": [
+            4.46518,
+            4.46588,
+            4.46578
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 913
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:07:13.488679+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:02:35Z\",\n    \"avg_ns\": 48231199345,\n    \"stddev_ns\": 3058216,\n    \"avg_ts\": 10.615535,\n    \"stddev_ts\": 0.000670,\n    \"samples_ns\": [ 48234489874, 48228488462, 48230619701 ],\n    \"samples_ts\": [ 10.6148, 10.6161, 10.6157 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:05:48Z\",\n    \"avg_ns\": 28302473770,\n    \"stddev_ns\": 11589990,\n    \"avg_ts\": 4.522573,\n    \"stddev_ts\": 0.001852,\n    \"samples_ns\": [ 28314473257, 28301600481, 28291347574 ],\n    \"samples_ts\": [ 4.52066, 4.52271, 4.52435 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:02:35Z",
+          "avg_ns": 48231199345,
+          "stddev_ns": 3058216,
+          "avg_ts": 10.615535,
+          "stddev_ts": 0.00067,
+          "samples_ns": [
+            48234489874,
+            48228488462,
+            48230619701
+          ],
+          "samples_ts": [
+            10.6148,
+            10.6161,
+            10.6157
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:05:48Z",
+          "avg_ns": 28302473770,
+          "stddev_ns": 11589990,
+          "avg_ts": 4.522573,
+          "stddev_ts": 0.001852,
+          "samples_ns": [
+            28314473257,
+            28301600481,
+            28291347574
+          ],
+          "samples_ts": [
+            4.52066,
+            4.52271,
+            4.52435
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 914
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:16:13.276899+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:07:15Z\",\n    \"avg_ns\": 48258999868,\n    \"stddev_ns\": 8898116,\n    \"avg_ts\": 10.609420,\n    \"stddev_ts\": 0.001956,\n    \"samples_ns\": [ 48269227880, 48254732801, 48253038923 ],\n    \"samples_ts\": [ 10.6072, 10.6104, 10.6107 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:10:28Z\",\n    \"avg_ns\": 114729516204,\n    \"stddev_ns\": 17541889,\n    \"avg_ts\": 4.462670,\n    \"stddev_ts\": 0.000682,\n    \"samples_ns\": [ 114748136990, 114727109985, 114713301637 ],\n    \"samples_ts\": [ 4.46195, 4.46276, 4.4633 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:07:15Z",
+          "avg_ns": 48258999868,
+          "stddev_ns": 8898116,
+          "avg_ts": 10.60942,
+          "stddev_ts": 0.001956,
+          "samples_ns": [
+            48269227880,
+            48254732801,
+            48253038923
+          ],
+          "samples_ts": [
+            10.6072,
+            10.6104,
+            10.6107
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:10:28Z",
+          "avg_ns": 114729516204,
+          "stddev_ns": 17541889,
+          "avg_ts": 4.46267,
+          "stddev_ts": 0.000682,
+          "samples_ns": [
+            114748136990,
+            114727109985,
+            114713301637
+          ],
+          "samples_ts": [
+            4.46195,
+            4.46276,
+            4.4633
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 915
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:18:28.765837+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:16:15Z\",\n    \"avg_ns\": 11997851062,\n    \"stddev_ns\": 6288462,\n    \"avg_ts\": 10.668579,\n    \"stddev_ts\": 0.005590,\n    \"samples_ns\": [ 11997075070, 11991987567, 12004490550 ],\n    \"samples_ts\": [ 10.6693, 10.6738, 10.6627 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:17:03Z\",\n    \"avg_ns\": 28334418958,\n    \"stddev_ns\": 4007620,\n    \"avg_ts\": 4.517474,\n    \"stddev_ts\": 0.000638,\n    \"samples_ns\": [ 28334968572, 28330168459, 28338119844 ],\n    \"samples_ts\": [ 4.51739, 4.51815, 4.51688 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:16:15Z",
+          "avg_ns": 11997851062,
+          "stddev_ns": 6288462,
+          "avg_ts": 10.668579,
+          "stddev_ts": 0.00559,
+          "samples_ns": [
+            11997075070,
+            11991987567,
+            12004490550
+          ],
+          "samples_ts": [
+            10.6693,
+            10.6738,
+            10.6627
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:17:03Z",
+          "avg_ns": 28334418958,
+          "stddev_ns": 4007620,
+          "avg_ts": 4.517474,
+          "stddev_ts": 0.000638,
+          "samples_ns": [
+            28334968572,
+            28330168459,
+            28338119844
+          ],
+          "samples_ts": [
+            4.51739,
+            4.51815,
+            4.51688
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 916
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:25:03.358982+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:18:30Z\",\n    \"avg_ns\": 12005656915,\n    \"stddev_ns\": 4955498,\n    \"avg_ts\": 10.661642,\n    \"stddev_ts\": 0.004400,\n    \"samples_ns\": [ 12003214342, 12011359534, 12002396869 ],\n    \"samples_ts\": [ 10.6638, 10.6566, 10.6645 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:19:18Z\",\n    \"avg_ns\": 114673407976,\n    \"stddev_ns\": 85611917,\n    \"avg_ts\": 4.464855,\n    \"stddev_ts\": 0.003332,\n    \"samples_ns\": [ 114631903421, 114616459357, 114771861150 ],\n    \"samples_ts\": [ 4.46647, 4.46707, 4.46102 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:18:30Z",
+          "avg_ns": 12005656915,
+          "stddev_ns": 4955498,
+          "avg_ts": 10.661642,
+          "stddev_ts": 0.0044,
+          "samples_ns": [
+            12003214342,
+            12011359534,
+            12002396869
+          ],
+          "samples_ts": [
+            10.6638,
+            10.6566,
+            10.6645
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:19:18Z",
+          "avg_ns": 114673407976,
+          "stddev_ns": 85611917,
+          "avg_ts": 4.464855,
+          "stddev_ts": 0.003332,
+          "samples_ns": [
+            114631903421,
+            114616459357,
+            114771861150
+          ],
+          "samples_ts": [
+            4.46647,
+            4.46707,
+            4.46102
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 917
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:29:44.448054+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:25:05Z\",\n    \"avg_ns\": 48376295380,\n    \"stddev_ns\": 1580716,\n    \"avg_ts\": 10.583696,\n    \"stddev_ts\": 0.000346,\n    \"samples_ns\": [ 48378064422, 48375800132, 48375021586 ],\n    \"samples_ts\": [ 10.5833, 10.5838, 10.584 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:28:19Z\",\n    \"avg_ns\": 28342997982,\n    \"stddev_ns\": 19049752,\n    \"avg_ts\": 4.516108,\n    \"stddev_ts\": 0.003034,\n    \"samples_ns\": [ 28364809212, 28334549822, 28329634914 ],\n    \"samples_ts\": [ 4.51263, 4.51745, 4.51824 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:25:05Z",
+          "avg_ns": 48376295380,
+          "stddev_ns": 1580716,
+          "avg_ts": 10.583696,
+          "stddev_ts": 0.000346,
+          "samples_ns": [
+            48378064422,
+            48375800132,
+            48375021586
+          ],
+          "samples_ts": [
+            10.5833,
+            10.5838,
+            10.584
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:28:19Z",
+          "avg_ns": 28342997982,
+          "stddev_ns": 19049752,
+          "avg_ts": 4.516108,
+          "stddev_ts": 0.003034,
+          "samples_ns": [
+            28364809212,
+            28334549822,
+            28329634914
+          ],
+          "samples_ts": [
+            4.51263,
+            4.51745,
+            4.51824
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 918
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:38:52.295726+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:29:46Z\",\n    \"avg_ns\": 49173204615,\n    \"stddev_ns\": 4867625,\n    \"avg_ts\": 10.412175,\n    \"stddev_ts\": 0.001031,\n    \"samples_ns\": [ 49178557154, 49172013783, 49169042908 ],\n    \"samples_ts\": [ 10.411, 10.4124, 10.4131 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:33:03Z\",\n    \"avg_ns\": 116296343628,\n    \"stddev_ns\": 738068183,\n    \"avg_ts\": 4.402664,\n    \"stddev_ts\": 0.027935,\n    \"samples_ns\": [ 117043297271, 116278239455, 115567494159 ],\n    \"samples_ts\": [ 4.37445, 4.40323, 4.43031 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:29:46Z",
+          "avg_ns": 49173204615,
+          "stddev_ns": 4867625,
+          "avg_ts": 10.412175,
+          "stddev_ts": 0.001031,
+          "samples_ns": [
+            49178557154,
+            49172013783,
+            49169042908
+          ],
+          "samples_ts": [
+            10.411,
+            10.4124,
+            10.4131
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:33:03Z",
+          "avg_ns": 116296343628,
+          "stddev_ns": 738068183,
+          "avg_ts": 4.402664,
+          "stddev_ts": 0.027935,
+          "samples_ns": [
+            117043297271,
+            116278239455,
+            115567494159
+          ],
+          "samples_ts": [
+            4.37445,
+            4.40323,
+            4.43031
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 919
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:41:07.873639+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:38:54Z\",\n    \"avg_ns\": 12001729814,\n    \"stddev_ns\": 12186076,\n    \"avg_ts\": 10.665137,\n    \"stddev_ts\": 0.010822,\n    \"samples_ns\": [ 12015762997, 11995595722, 11993830725 ],\n    \"samples_ts\": [ 10.6527, 10.6706, 10.6722 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:39:42Z\",\n    \"avg_ns\": 28364675275,\n    \"stddev_ns\": 3071581,\n    \"avg_ts\": 4.512655,\n    \"stddev_ts\": 0.000487,\n    \"samples_ns\": [ 28361162214, 28366082975, 28366780638 ],\n    \"samples_ts\": [ 4.51321, 4.51243, 4.51232 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:38:54Z",
+          "avg_ns": 12001729814,
+          "stddev_ns": 12186076,
+          "avg_ts": 10.665137,
+          "stddev_ts": 0.010822,
+          "samples_ns": [
+            12015762997,
+            11995595722,
+            11993830725
+          ],
+          "samples_ts": [
+            10.6527,
+            10.6706,
+            10.6722
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:39:42Z",
+          "avg_ns": 28364675275,
+          "stddev_ns": 3071581,
+          "avg_ts": 4.512655,
+          "stddev_ts": 0.000487,
+          "samples_ns": [
+            28361162214,
+            28366082975,
+            28366780638
+          ],
+          "samples_ts": [
+            4.51321,
+            4.51243,
+            4.51232
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 920
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:47:43.142066+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:41:10Z\",\n    \"avg_ns\": 12050426461,\n    \"stddev_ns\": 77259518,\n    \"avg_ts\": 10.622321,\n    \"stddev_ts\": 0.067876,\n    \"samples_ns\": [ 12138711111, 12017392089, 11995176184 ],\n    \"samples_ts\": [ 10.5448, 10.6512, 10.671 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:41:58Z\",\n    \"avg_ns\": 114799241407,\n    \"stddev_ns\": 18541510,\n    \"avg_ts\": 4.459960,\n    \"stddev_ts\": 0.000720,\n    \"samples_ns\": [ 114818940631, 114782137262, 114796646329 ],\n    \"samples_ts\": [ 4.45919, 4.46062, 4.46006 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:41:10Z",
+          "avg_ns": 12050426461,
+          "stddev_ns": 77259518,
+          "avg_ts": 10.622321,
+          "stddev_ts": 0.067876,
+          "samples_ns": [
+            12138711111,
+            12017392089,
+            11995176184
+          ],
+          "samples_ts": [
+            10.5448,
+            10.6512,
+            10.671
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:41:58Z",
+          "avg_ns": 114799241407,
+          "stddev_ns": 18541510,
+          "avg_ts": 4.45996,
+          "stddev_ts": 0.00072,
+          "samples_ns": [
+            114818940631,
+            114782137262,
+            114796646329
+          ],
+          "samples_ts": [
+            4.45919,
+            4.46062,
+            4.46006
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 921
+    },
+    {
+      "timestamp_utc": "2025-12-09T23:52:26.790004+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:47:45Z\",\n    \"avg_ns\": 49033924976,\n    \"stddev_ns\": 1394967,\n    \"avg_ts\": 10.441750,\n    \"stddev_ts\": 0.000289,\n    \"samples_ns\": [ 49035476085, 49033357785, 49032941060 ],\n    \"samples_ts\": [ 10.4414, 10.4419, 10.442 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:51:01Z\",\n    \"avg_ns\": 28320462605,\n    \"stddev_ns\": 10911613,\n    \"avg_ts\": 4.519701,\n    \"stddev_ts\": 0.001741,\n    \"samples_ns\": [ 28329616565, 28323381542, 28308389709 ],\n    \"samples_ts\": [ 4.51824, 4.51923, 4.52163 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:47:45Z",
+          "avg_ns": 49033924976,
+          "stddev_ns": 1394967,
+          "avg_ts": 10.44175,
+          "stddev_ts": 0.000289,
+          "samples_ns": [
+            49035476085,
+            49033357785,
+            49032941060
+          ],
+          "samples_ts": [
+            10.4414,
+            10.4419,
+            10.442
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:51:01Z",
+          "avg_ns": 28320462605,
+          "stddev_ns": 10911613,
+          "avg_ts": 4.519701,
+          "stddev_ts": 0.001741,
+          "samples_ns": [
+            28329616565,
+            28323381542,
+            28308389709
+          ],
+          "samples_ts": [
+            4.51824,
+            4.51923,
+            4.52163
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 922
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:01:30.607021+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:52:28Z\",\n    \"avg_ns\": 49045981019,\n    \"stddev_ns\": 4093360,\n    \"avg_ts\": 10.439184,\n    \"stddev_ts\": 0.000871,\n    \"samples_ns\": [ 49050698662, 49043370243, 49043874152 ],\n    \"samples_ts\": [ 10.4382, 10.4397, 10.4396 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-09T23:55:45Z\",\n    \"avg_ns\": 115036487136,\n    \"stddev_ns\": 41351525,\n    \"avg_ts\": 4.450762,\n    \"stddev_ts\": 0.001600,\n    \"samples_ns\": [ 115025145504, 115001991310, 115082324595 ],\n    \"samples_ts\": [ 4.4512, 4.4521, 4.44899 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:52:28Z",
+          "avg_ns": 49045981019,
+          "stddev_ns": 4093360,
+          "avg_ts": 10.439184,
+          "stddev_ts": 0.000871,
+          "samples_ns": [
+            49050698662,
+            49043370243,
+            49043874152
+          ],
+          "samples_ts": [
+            10.4382,
+            10.4397,
+            10.4396
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-09T23:55:45Z",
+          "avg_ns": 115036487136,
+          "stddev_ns": 41351525,
+          "avg_ts": 4.450762,
+          "stddev_ts": 0.0016,
+          "samples_ns": [
+            115025145504,
+            115001991310,
+            115082324595
+          ],
+          "samples_ts": [
+            4.4512,
+            4.4521,
+            4.44899
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 923
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:03:46.428713+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:01:32Z\",\n    \"avg_ns\": 11999442018,\n    \"stddev_ns\": 6111238,\n    \"avg_ts\": 10.667165,\n    \"stddev_ts\": 0.005430,\n    \"samples_ns\": [ 12006482345, 11996322586, 11995521124 ],\n    \"samples_ts\": [ 10.6609, 10.6699, 10.6706 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:02:20Z\",\n    \"avg_ns\": 28435486723,\n    \"stddev_ns\": 4054607,\n    \"avg_ts\": 4.501418,\n    \"stddev_ts\": 0.000641,\n    \"samples_ns\": [ 28439367805, 28431290994, 28435801372 ],\n    \"samples_ts\": [ 4.5008, 4.50208, 4.50137 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:01:32Z",
+          "avg_ns": 11999442018,
+          "stddev_ns": 6111238,
+          "avg_ts": 10.667165,
+          "stddev_ts": 0.00543,
+          "samples_ns": [
+            12006482345,
+            11996322586,
+            11995521124
+          ],
+          "samples_ts": [
+            10.6609,
+            10.6699,
+            10.6706
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:02:20Z",
+          "avg_ns": 28435486723,
+          "stddev_ns": 4054607,
+          "avg_ts": 4.501418,
+          "stddev_ts": 0.000641,
+          "samples_ns": [
+            28439367805,
+            28431290994,
+            28435801372
+          ],
+          "samples_ts": [
+            4.5008,
+            4.50208,
+            4.50137
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 924
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:10:21.260831+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:03:48Z\",\n    \"avg_ns\": 12001118051,\n    \"stddev_ns\": 2232229,\n    \"avg_ts\": 10.665673,\n    \"stddev_ts\": 0.001979,\n    \"samples_ns\": [ 12003328108, 11998874787, 12001151260 ],\n    \"samples_ts\": [ 10.6637, 10.6677, 10.6656 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:04:36Z\",\n    \"avg_ns\": 114770574511,\n    \"stddev_ns\": 2100250,\n    \"avg_ts\": 4.461074,\n    \"stddev_ts\": 0.000081,\n    \"samples_ns\": [ 114769015159, 114772926637, 114769781738 ],\n    \"samples_ts\": [ 4.46113, 4.46098, 4.4611 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:03:48Z",
+          "avg_ns": 12001118051,
+          "stddev_ns": 2232229,
+          "avg_ts": 10.665673,
+          "stddev_ts": 0.001979,
+          "samples_ns": [
+            12003328108,
+            11998874787,
+            12001151260
+          ],
+          "samples_ts": [
+            10.6637,
+            10.6677,
+            10.6656
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:04:36Z",
+          "avg_ns": 114770574511,
+          "stddev_ns": 2100250,
+          "avg_ts": 4.461074,
+          "stddev_ts": 8.1e-05,
+          "samples_ns": [
+            114769015159,
+            114772926637,
+            114769781738
+          ],
+          "samples_ts": [
+            4.46113,
+            4.46098,
+            4.4611
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 925
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:15:01.976568+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:10:23Z\",\n    \"avg_ns\": 48262491230,\n    \"stddev_ns\": 1001117,\n    \"avg_ts\": 10.608653,\n    \"stddev_ts\": 0.000215,\n    \"samples_ns\": [ 48263064342, 48263045883, 48261363466 ],\n    \"samples_ts\": [ 10.6085, 10.6085, 10.6089 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:13:36Z\",\n    \"avg_ns\": 28365800742,\n    \"stddev_ns\": 15110078,\n    \"avg_ts\": 4.512477,\n    \"stddev_ts\": 0.002403,\n    \"samples_ns\": [ 28382906864, 28354277569, 28360217794 ],\n    \"samples_ts\": [ 4.50976, 4.51431, 4.51336 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:10:23Z",
+          "avg_ns": 48262491230,
+          "stddev_ns": 1001117,
+          "avg_ts": 10.608653,
+          "stddev_ts": 0.000215,
+          "samples_ns": [
+            48263064342,
+            48263045883,
+            48261363466
+          ],
+          "samples_ts": [
+            10.6085,
+            10.6085,
+            10.6089
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:13:36Z",
+          "avg_ns": 28365800742,
+          "stddev_ns": 15110078,
+          "avg_ts": 4.512477,
+          "stddev_ts": 0.002403,
+          "samples_ns": [
+            28382906864,
+            28354277569,
+            28360217794
+          ],
+          "samples_ts": [
+            4.50976,
+            4.51431,
+            4.51336
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 926
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:24:03.540000+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:15:04Z\",\n    \"avg_ns\": 48260162037,\n    \"stddev_ns\": 757413,\n    \"avg_ts\": 10.609165,\n    \"stddev_ts\": 0.000167,\n    \"samples_ns\": [ 48261036472, 48259710741, 48259738898 ],\n    \"samples_ts\": [ 10.609, 10.6093, 10.6093 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:18:17Z\",\n    \"avg_ns\": 115327379158,\n    \"stddev_ns\": 17204323,\n    \"avg_ts\": 4.439536,\n    \"stddev_ts\": 0.000662,\n    \"samples_ns\": [ 115346655490, 115321901041, 115313580943 ],\n    \"samples_ts\": [ 4.43879, 4.43975, 4.44007 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:15:04Z",
+          "avg_ns": 48260162037,
+          "stddev_ns": 757413,
+          "avg_ts": 10.609165,
+          "stddev_ts": 0.000167,
+          "samples_ns": [
+            48261036472,
+            48259710741,
+            48259738898
+          ],
+          "samples_ts": [
+            10.609,
+            10.6093,
+            10.6093
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:18:17Z",
+          "avg_ns": 115327379158,
+          "stddev_ns": 17204323,
+          "avg_ts": 4.439536,
+          "stddev_ts": 0.000662,
+          "samples_ns": [
+            115346655490,
+            115321901041,
+            115313580943
+          ],
+          "samples_ts": [
+            4.43879,
+            4.43975,
+            4.44007
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 927
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:26:20.131796+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:24:05Z\",\n    \"avg_ns\": 12218117511,\n    \"stddev_ns\": 18932860,\n    \"avg_ts\": 10.476262,\n    \"stddev_ts\": 0.016220,\n    \"samples_ns\": [ 12205281756, 12239861371, 12209209406 ],\n    \"samples_ts\": [ 10.4873, 10.4576, 10.4839 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:24:54Z\",\n    \"avg_ns\": 28410610216,\n    \"stddev_ns\": 106673635,\n    \"avg_ts\": 4.505401,\n    \"stddev_ts\": 0.016881,\n    \"samples_ns\": [ 28533209503, 28359620635, 28339000511 ],\n    \"samples_ts\": [ 4.486, 4.51346, 4.51674 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:24:05Z",
+          "avg_ns": 12218117511,
+          "stddev_ns": 18932860,
+          "avg_ts": 10.476262,
+          "stddev_ts": 0.01622,
+          "samples_ns": [
+            12205281756,
+            12239861371,
+            12209209406
+          ],
+          "samples_ts": [
+            10.4873,
+            10.4576,
+            10.4839
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:24:54Z",
+          "avg_ns": 28410610216,
+          "stddev_ns": 106673635,
+          "avg_ts": 4.505401,
+          "stddev_ts": 0.016881,
+          "samples_ns": [
+            28533209503,
+            28359620635,
+            28339000511
+          ],
+          "samples_ts": [
+            4.486,
+            4.51346,
+            4.51674
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 928
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:32:54.154388+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:26:22Z\",\n    \"avg_ns\": 11998031483,\n    \"stddev_ns\": 14812309,\n    \"avg_ts\": 10.668428,\n    \"stddev_ts\": 0.013161,\n    \"samples_ns\": [ 11989682402, 12015133210, 11989278838 ],\n    \"samples_ts\": [ 10.6758, 10.6532, 10.6762 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:27:10Z\",\n    \"avg_ns\": 114527603968,\n    \"stddev_ns\": 23808188,\n    \"avg_ts\": 4.470538,\n    \"stddev_ts\": 0.000929,\n    \"samples_ns\": [ 114554471208, 114509126203, 114519214493 ],\n    \"samples_ts\": [ 4.46949, 4.47126, 4.47087 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:26:22Z",
+          "avg_ns": 11998031483,
+          "stddev_ns": 14812309,
+          "avg_ts": 10.668428,
+          "stddev_ts": 0.013161,
+          "samples_ns": [
+            11989682402,
+            12015133210,
+            11989278838
+          ],
+          "samples_ts": [
+            10.6758,
+            10.6532,
+            10.6762
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:27:10Z",
+          "avg_ns": 114527603968,
+          "stddev_ns": 23808188,
+          "avg_ts": 4.470538,
+          "stddev_ts": 0.000929,
+          "samples_ns": [
+            114554471208,
+            114509126203,
+            114519214493
+          ],
+          "samples_ts": [
+            4.46949,
+            4.47126,
+            4.47087
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 929
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:37:35.711816+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:32:56Z\",\n    \"avg_ns\": 48352131372,\n    \"stddev_ns\": 3980170,\n    \"avg_ts\": 10.588985,\n    \"stddev_ts\": 0.000870,\n    \"samples_ns\": [ 48356360793, 48351558545, 48348474779 ],\n    \"samples_ts\": [ 10.5881, 10.5891, 10.5898 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:36:10Z\",\n    \"avg_ns\": 28385723440,\n    \"stddev_ns\": 18363851,\n    \"avg_ts\": 4.509310,\n    \"stddev_ts\": 0.002916,\n    \"samples_ns\": [ 28406896526, 28376111384, 28374162412 ],\n    \"samples_ts\": [ 4.50595, 4.51084, 4.51115 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:32:56Z",
+          "avg_ns": 48352131372,
+          "stddev_ns": 3980170,
+          "avg_ts": 10.588985,
+          "stddev_ts": 0.00087,
+          "samples_ns": [
+            48356360793,
+            48351558545,
+            48348474779
+          ],
+          "samples_ts": [
+            10.5881,
+            10.5891,
+            10.5898
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:36:10Z",
+          "avg_ns": 28385723440,
+          "stddev_ns": 18363851,
+          "avg_ts": 4.50931,
+          "stddev_ts": 0.002916,
+          "samples_ns": [
+            28406896526,
+            28376111384,
+            28374162412
+          ],
+          "samples_ts": [
+            4.50595,
+            4.51084,
+            4.51115
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 930
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:46:40.310540+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:37:37Z\",\n    \"avg_ns\": 49274862630,\n    \"stddev_ns\": 3176562,\n    \"avg_ts\": 10.390694,\n    \"stddev_ts\": 0.000668,\n    \"samples_ns\": [ 49272482733, 49273645643, 49278459515 ],\n    \"samples_ts\": [ 10.3912, 10.391, 10.3899 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:40:54Z\",\n    \"avg_ns\": 115007628479,\n    \"stddev_ns\": 694067644,\n    \"avg_ts\": 4.451986,\n    \"stddev_ts\": 0.026774,\n    \"samples_ns\": [ 115809067874, 114607884930, 114605932634 ],\n    \"samples_ts\": [ 4.42107, 4.46741, 4.46748 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:37:37Z",
+          "avg_ns": 49274862630,
+          "stddev_ns": 3176562,
+          "avg_ts": 10.390694,
+          "stddev_ts": 0.000668,
+          "samples_ns": [
+            49272482733,
+            49273645643,
+            49278459515
+          ],
+          "samples_ts": [
+            10.3912,
+            10.391,
+            10.3899
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:40:54Z",
+          "avg_ns": 115007628479,
+          "stddev_ns": 694067644,
+          "avg_ts": 4.451986,
+          "stddev_ts": 0.026774,
+          "samples_ns": [
+            115809067874,
+            114607884930,
+            114605932634
+          ],
+          "samples_ts": [
+            4.42107,
+            4.46741,
+            4.46748
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 931
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:48:56.130583+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:46:42Z\",\n    \"avg_ns\": 12011532806,\n    \"stddev_ns\": 15182408,\n    \"avg_ts\": 10.656436,\n    \"stddev_ts\": 0.013469,\n    \"samples_ns\": [ 12026828921, 12011302779, 11996466718 ],\n    \"samples_ts\": [ 10.6429, 10.6566, 10.6698 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:47:30Z\",\n    \"avg_ns\": 28447613201,\n    \"stddev_ns\": 4522437,\n    \"avg_ts\": 4.499499,\n    \"stddev_ts\": 0.000715,\n    \"samples_ns\": [ 28450633917, 28449791867, 28442413819 ],\n    \"samples_ts\": [ 4.49902, 4.49915, 4.50032 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:46:42Z",
+          "avg_ns": 12011532806,
+          "stddev_ns": 15182408,
+          "avg_ts": 10.656436,
+          "stddev_ts": 0.013469,
+          "samples_ns": [
+            12026828921,
+            12011302779,
+            11996466718
+          ],
+          "samples_ts": [
+            10.6429,
+            10.6566,
+            10.6698
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:47:30Z",
+          "avg_ns": 28447613201,
+          "stddev_ns": 4522437,
+          "avg_ts": 4.499499,
+          "stddev_ts": 0.000715,
+          "samples_ns": [
+            28450633917,
+            28449791867,
+            28442413819
+          ],
+          "samples_ts": [
+            4.49902,
+            4.49915,
+            4.50032
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 932
+    },
+    {
+      "timestamp_utc": "2025-12-10T00:55:30.780974+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:48:58Z\",\n    \"avg_ns\": 12010105292,\n    \"stddev_ns\": 14984988,\n    \"avg_ts\": 10.657703,\n    \"stddev_ts\": 0.013289,\n    \"samples_ns\": [ 12027157284, 12004118598, 11999039996 ],\n    \"samples_ts\": [ 10.6426, 10.663, 10.6675 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:49:46Z\",\n    \"avg_ns\": 114721011851,\n    \"stddev_ns\": 12936237,\n    \"avg_ts\": 4.463001,\n    \"stddev_ts\": 0.000503,\n    \"samples_ns\": [ 114735487211, 114710599352, 114716948991 ],\n    \"samples_ts\": [ 4.46244, 4.46341, 4.46316 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:48:58Z",
+          "avg_ns": 12010105292,
+          "stddev_ns": 14984988,
+          "avg_ts": 10.657703,
+          "stddev_ts": 0.013289,
+          "samples_ns": [
+            12027157284,
+            12004118598,
+            11999039996
+          ],
+          "samples_ts": [
+            10.6426,
+            10.663,
+            10.6675
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:49:46Z",
+          "avg_ns": 114721011851,
+          "stddev_ns": 12936237,
+          "avg_ts": 4.463001,
+          "stddev_ts": 0.000503,
+          "samples_ns": [
+            114735487211,
+            114710599352,
+            114716948991
+          ],
+          "samples_ts": [
+            4.46244,
+            4.46341,
+            4.46316
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 933
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:00:14.647885+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:55:32Z\",\n    \"avg_ns\": 49039966200,\n    \"stddev_ns\": 5258131,\n    \"avg_ts\": 10.440464,\n    \"stddev_ts\": 0.001118,\n    \"samples_ns\": [ 49034339185, 49044742215, 49040817201 ],\n    \"samples_ts\": [ 10.4417, 10.4394, 10.4403 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T00:58:49Z\",\n    \"avg_ns\": 28406838404,\n    \"stddev_ns\": 14357919,\n    \"avg_ts\": 4.505958,\n    \"stddev_ts\": 0.002277,\n    \"samples_ns\": [ 28423315658, 28397019735, 28400179820 ],\n    \"samples_ts\": [ 4.50335, 4.50752, 4.50701 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:55:32Z",
+          "avg_ns": 49039966200,
+          "stddev_ns": 5258131,
+          "avg_ts": 10.440464,
+          "stddev_ts": 0.001118,
+          "samples_ns": [
+            49034339185,
+            49044742215,
+            49040817201
+          ],
+          "samples_ts": [
+            10.4417,
+            10.4394,
+            10.4403
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T00:58:49Z",
+          "avg_ns": 28406838404,
+          "stddev_ns": 14357919,
+          "avg_ts": 4.505958,
+          "stddev_ts": 0.002277,
+          "samples_ns": [
+            28423315658,
+            28397019735,
+            28400179820
+          ],
+          "samples_ts": [
+            4.50335,
+            4.50752,
+            4.50701
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 934
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:09:19.231688+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:00:16Z\",\n    \"avg_ns\": 49050561476,\n    \"stddev_ns\": 5438776,\n    \"avg_ts\": 10.438209,\n    \"stddev_ts\": 0.001156,\n    \"samples_ns\": [ 49056741340, 49048413992, 49046529097 ],\n    \"samples_ts\": [ 10.4369, 10.4387, 10.4391 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:03:32Z\",\n    \"avg_ns\": 115296800862,\n    \"stddev_ns\": 33551977,\n    \"avg_ts\": 4.440713,\n    \"stddev_ts\": 0.001292,\n    \"samples_ns\": [ 115335152980, 115282351425, 115272898183 ],\n    \"samples_ts\": [ 4.43924, 4.44127, 4.44163 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:00:16Z",
+          "avg_ns": 49050561476,
+          "stddev_ns": 5438776,
+          "avg_ts": 10.438209,
+          "stddev_ts": 0.001156,
+          "samples_ns": [
+            49056741340,
+            49048413992,
+            49046529097
+          ],
+          "samples_ts": [
+            10.4369,
+            10.4387,
+            10.4391
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:03:32Z",
+          "avg_ns": 115296800862,
+          "stddev_ns": 33551977,
+          "avg_ts": 4.440713,
+          "stddev_ts": 0.001292,
+          "samples_ns": [
+            115335152980,
+            115282351425,
+            115272898183
+          ],
+          "samples_ts": [
+            4.43924,
+            4.44127,
+            4.44163
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 935
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:11:16.209158+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:09:21Z\",\n    \"avg_ns\": 8243615334,\n    \"stddev_ns\": 7642664,\n    \"avg_ts\": 15.527177,\n    \"stddev_ts\": 0.014388,\n    \"samples_ns\": [ 8252439936, 8239131472, 8239274594 ],\n    \"samples_ts\": [ 15.5106, 15.5356, 15.5353 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:09:54Z\",\n    \"avg_ns\": 27146194782,\n    \"stddev_ns\": 16991014,\n    \"avg_ts\": 4.715211,\n    \"stddev_ts\": 0.002950,\n    \"samples_ns\": [ 27165677290, 27138458141, 27134448915 ],\n    \"samples_ts\": [ 4.71183, 4.71655, 4.71725 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:09:21Z",
+          "avg_ns": 8243615334,
+          "stddev_ns": 7642664,
+          "avg_ts": 15.527177,
+          "stddev_ts": 0.014388,
+          "samples_ns": [
+            8252439936,
+            8239131472,
+            8239274594
+          ],
+          "samples_ts": [
+            15.5106,
+            15.5356,
+            15.5353
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:09:54Z",
+          "avg_ns": 27146194782,
+          "stddev_ns": 16991014,
+          "avg_ts": 4.715211,
+          "stddev_ts": 0.00295,
+          "samples_ns": [
+            27165677290,
+            27138458141,
+            27134448915
+          ],
+          "samples_ts": [
+            4.71183,
+            4.71655,
+            4.71725
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 936
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:17:20.312341+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:11:18Z\",\n    \"avg_ns\": 8255094540,\n    \"stddev_ns\": 8929131,\n    \"avg_ts\": 15.505589,\n    \"stddev_ts\": 0.016772,\n    \"samples_ns\": [ 8255539281, 8263792528, 8245951812 ],\n    \"samples_ts\": [ 15.5047, 15.4893, 15.5228 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:11:51Z\",\n    \"avg_ns\": 109516225646,\n    \"stddev_ns\": 52668053,\n    \"avg_ts\": 4.675107,\n    \"stddev_ts\": 0.002248,\n    \"samples_ns\": [ 109575814794, 109475911353, 109496950792 ],\n    \"samples_ts\": [ 4.67256, 4.67683, 4.67593 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:11:18Z",
+          "avg_ns": 8255094540,
+          "stddev_ns": 8929131,
+          "avg_ts": 15.505589,
+          "stddev_ts": 0.016772,
+          "samples_ns": [
+            8255539281,
+            8263792528,
+            8245951812
+          ],
+          "samples_ts": [
+            15.5047,
+            15.4893,
+            15.5228
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:11:51Z",
+          "avg_ns": 109516225646,
+          "stddev_ns": 52668053,
+          "avg_ts": 4.675107,
+          "stddev_ts": 0.002248,
+          "samples_ns": [
+            109575814794,
+            109475911353,
+            109496950792
+          ],
+          "samples_ts": [
+            4.67256,
+            4.67683,
+            4.67593
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 937
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:20:58.064283+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:17:22Z\",\n    \"avg_ns\": 33153884396,\n    \"stddev_ns\": 3894125,\n    \"avg_ts\": 15.443138,\n    \"stddev_ts\": 0.001810,\n    \"samples_ns\": [ 33157647942, 33149887276, 33154117972 ],\n    \"samples_ts\": [ 15.4414, 15.445, 15.443 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:19:35Z\",\n    \"avg_ns\": 27492852205,\n    \"stddev_ns\": 387878449,\n    \"avg_ts\": 4.656378,\n    \"stddev_ts\": 0.066149,\n    \"samples_ns\": [ 27053041303, 27639447070, 27786068243 ],\n    \"samples_ts\": [ 4.73145, 4.63106, 4.60663 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:17:22Z",
+          "avg_ns": 33153884396,
+          "stddev_ns": 3894125,
+          "avg_ts": 15.443138,
+          "stddev_ts": 0.00181,
+          "samples_ns": [
+            33157647942,
+            33149887276,
+            33154117972
+          ],
+          "samples_ts": [
+            15.4414,
+            15.445,
+            15.443
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:19:35Z",
+          "avg_ns": 27492852205,
+          "stddev_ns": 387878449,
+          "avg_ts": 4.656378,
+          "stddev_ts": 0.066149,
+          "samples_ns": [
+            27053041303,
+            27639447070,
+            27786068243
+          ],
+          "samples_ts": [
+            4.73145,
+            4.63106,
+            4.60663
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 938
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:28:49.830655+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:21:00Z\",\n    \"avg_ns\": 33163251684,\n    \"stddev_ns\": 6146620,\n    \"avg_ts\": 15.438776,\n    \"stddev_ts\": 0.002859,\n    \"samples_ns\": [ 33159474839, 33159942397, 33170337818 ],\n    \"samples_ts\": [ 15.4405, 15.4403, 15.4355 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:23:12Z\",\n    \"avg_ns\": 112189341870,\n    \"stddev_ns\": 721237894,\n    \"avg_ts\": 4.563839,\n    \"stddev_ts\": 0.029242,\n    \"samples_ns\": [ 113012676987, 111669169285, 111886179340 ],\n    \"samples_ts\": [ 4.53047, 4.58497, 4.57608 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:21:00Z",
+          "avg_ns": 33163251684,
+          "stddev_ns": 6146620,
+          "avg_ts": 15.438776,
+          "stddev_ts": 0.002859,
+          "samples_ns": [
+            33159474839,
+            33159942397,
+            33170337818
+          ],
+          "samples_ts": [
+            15.4405,
+            15.4403,
+            15.4355
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:23:12Z",
+          "avg_ns": 112189341870,
+          "stddev_ns": 721237894,
+          "avg_ts": 4.563839,
+          "stddev_ts": 0.029242,
+          "samples_ns": [
+            113012676987,
+            111669169285,
+            111886179340
+          ],
+          "samples_ts": [
+            4.53047,
+            4.58497,
+            4.57608
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 939
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:30:47.290095+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:28:51Z\",\n    \"avg_ns\": 8262937148,\n    \"stddev_ns\": 28676176,\n    \"avg_ts\": 15.490984,\n    \"stddev_ts\": 0.053663,\n    \"samples_ns\": [ 8295726203, 8250540310, 8242544931 ],\n    \"samples_ts\": [ 15.4296, 15.5141, 15.5292 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:29:24Z\",\n    \"avg_ns\": 27323723218,\n    \"stddev_ns\": 128571583,\n    \"avg_ts\": 4.684643,\n    \"stddev_ts\": 0.022011,\n    \"samples_ns\": [ 27211859836, 27464186178, 27295123642 ],\n    \"samples_ts\": [ 4.70383, 4.66062, 4.68948 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:28:51Z",
+          "avg_ns": 8262937148,
+          "stddev_ns": 28676176,
+          "avg_ts": 15.490984,
+          "stddev_ts": 0.053663,
+          "samples_ns": [
+            8295726203,
+            8250540310,
+            8242544931
+          ],
+          "samples_ts": [
+            15.4296,
+            15.5141,
+            15.5292
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:29:24Z",
+          "avg_ns": 27323723218,
+          "stddev_ns": 128571583,
+          "avg_ts": 4.684643,
+          "stddev_ts": 0.022011,
+          "samples_ns": [
+            27211859836,
+            27464186178,
+            27295123642
+          ],
+          "samples_ts": [
+            4.70383,
+            4.66062,
+            4.68948
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 940
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:36:54.051717+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:30:49Z\",\n    \"avg_ns\": 8259212200,\n    \"stddev_ns\": 19371662,\n    \"avg_ts\": 15.497903,\n    \"stddev_ts\": 0.036304,\n    \"samples_ns\": [ 8245844393, 8250364278, 8281427929 ],\n    \"samples_ts\": [ 15.523, 15.5145, 15.4563 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:31:22Z\",\n    \"avg_ns\": 110420338630,\n    \"stddev_ns\": 745397014,\n    \"avg_ts\": 4.636967,\n    \"stddev_ts\": 0.031213,\n    \"samples_ns\": [ 109819192911, 111254376181, 110187446798 ],\n    \"samples_ts\": [ 4.66221, 4.60207, 4.64663 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:30:49Z",
+          "avg_ns": 8259212200,
+          "stddev_ns": 19371662,
+          "avg_ts": 15.497903,
+          "stddev_ts": 0.036304,
+          "samples_ns": [
+            8245844393,
+            8250364278,
+            8281427929
+          ],
+          "samples_ts": [
+            15.523,
+            15.5145,
+            15.4563
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:31:22Z",
+          "avg_ns": 110420338630,
+          "stddev_ns": 745397014,
+          "avg_ts": 4.636967,
+          "stddev_ts": 0.031213,
+          "samples_ns": [
+            109819192911,
+            111254376181,
+            110187446798
+          ],
+          "samples_ts": [
+            4.66221,
+            4.60207,
+            4.64663
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 941
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:40:31.036476+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:36:56Z\",\n    \"avg_ns\": 33303277796,\n    \"stddev_ns\": 3050453,\n    \"avg_ts\": 15.373862,\n    \"stddev_ts\": 0.001408,\n    \"samples_ns\": [ 33306784324, 33301813432, 33301235632 ],\n    \"samples_ts\": [ 15.3722, 15.3745, 15.3748 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:39:09Z\",\n    \"avg_ns\": 27079333322,\n    \"stddev_ns\": 13233351,\n    \"avg_ts\": 4.726853,\n    \"stddev_ts\": 0.002309,\n    \"samples_ns\": [ 27094492166, 27073404521, 27070103281 ],\n    \"samples_ts\": [ 4.72421, 4.72789, 4.72846 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:36:56Z",
+          "avg_ns": 33303277796,
+          "stddev_ns": 3050453,
+          "avg_ts": 15.373862,
+          "stddev_ts": 0.001408,
+          "samples_ns": [
+            33306784324,
+            33301813432,
+            33301235632
+          ],
+          "samples_ts": [
+            15.3722,
+            15.3745,
+            15.3748
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:39:09Z",
+          "avg_ns": 27079333322,
+          "stddev_ns": 13233351,
+          "avg_ts": 4.726853,
+          "stddev_ts": 0.002309,
+          "samples_ns": [
+            27094492166,
+            27073404521,
+            27070103281
+          ],
+          "samples_ts": [
+            4.72421,
+            4.72789,
+            4.72846
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 942
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:48:14.630922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:40:33Z\",\n    \"avg_ns\": 33298117708,\n    \"stddev_ns\": 5715486,\n    \"avg_ts\": 15.376245,\n    \"stddev_ts\": 0.002638,\n    \"samples_ns\": [ 33298486530, 33303636934, 33292229661 ],\n    \"samples_ts\": [ 15.3761, 15.3737, 15.379 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:42:46Z\",\n    \"avg_ns\": 109294971758,\n    \"stddev_ns\": 11497823,\n    \"avg_ts\": 4.684571,\n    \"stddev_ts\": 0.000493,\n    \"samples_ns\": [ 109296438216, 109282815843, 109305661216 ],\n    \"samples_ts\": [ 4.68451, 4.68509, 4.68411 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:40:33Z",
+          "avg_ns": 33298117708,
+          "stddev_ns": 5715486,
+          "avg_ts": 15.376245,
+          "stddev_ts": 0.002638,
+          "samples_ns": [
+            33298486530,
+            33303636934,
+            33292229661
+          ],
+          "samples_ts": [
+            15.3761,
+            15.3737,
+            15.379
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:42:46Z",
+          "avg_ns": 109294971758,
+          "stddev_ns": 11497823,
+          "avg_ts": 4.684571,
+          "stddev_ts": 0.000493,
+          "samples_ns": [
+            109296438216,
+            109282815843,
+            109305661216
+          ],
+          "samples_ts": [
+            4.68451,
+            4.68509,
+            4.68411
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 943
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:50:11.831524+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:48:16Z\",\n    \"avg_ns\": 8252050068,\n    \"stddev_ns\": 14033112,\n    \"avg_ts\": 15.511327,\n    \"stddev_ts\": 0.026352,\n    \"samples_ns\": [ 8243540709, 8244362329, 8268247166 ],\n    \"samples_ts\": [ 15.5273, 15.5258, 15.4809 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:48:49Z\",\n    \"avg_ns\": 27236544576,\n    \"stddev_ns\": 353445918,\n    \"avg_ts\": 4.700092,\n    \"stddev_ts\": 0.060555,\n    \"samples_ns\": [ 27002343708, 27064186062, 27643103958 ],\n    \"samples_ts\": [ 4.74033, 4.7295, 4.63045 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:48:16Z",
+          "avg_ns": 8252050068,
+          "stddev_ns": 14033112,
+          "avg_ts": 15.511327,
+          "stddev_ts": 0.026352,
+          "samples_ns": [
+            8243540709,
+            8244362329,
+            8268247166
+          ],
+          "samples_ts": [
+            15.5273,
+            15.5258,
+            15.4809
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:48:49Z",
+          "avg_ns": 27236544576,
+          "stddev_ns": 353445918,
+          "avg_ts": 4.700092,
+          "stddev_ts": 0.060555,
+          "samples_ns": [
+            27002343708,
+            27064186062,
+            27643103958
+          ],
+          "samples_ts": [
+            4.74033,
+            4.7295,
+            4.63045
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 944
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:56:19.088473+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:50:14Z\",\n    \"avg_ns\": 8264145449,\n    \"stddev_ns\": 9803850,\n    \"avg_ts\": 15.488609,\n    \"stddev_ts\": 0.018381,\n    \"samples_ns\": [ 8272767628, 8266187234, 8253481485 ],\n    \"samples_ts\": [ 15.4725, 15.4848, 15.5086 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:50:47Z\",\n    \"avg_ns\": 110545465940,\n    \"stddev_ns\": 1035425923,\n    \"avg_ts\": 4.631848,\n    \"stddev_ts\": 0.043160,\n    \"samples_ns\": [ 109852868725, 111735765997, 110047763100 ],\n    \"samples_ts\": [ 4.66078, 4.58224, 4.65253 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:50:14Z",
+          "avg_ns": 8264145449,
+          "stddev_ns": 9803850,
+          "avg_ts": 15.488609,
+          "stddev_ts": 0.018381,
+          "samples_ns": [
+            8272767628,
+            8266187234,
+            8253481485
+          ],
+          "samples_ts": [
+            15.4725,
+            15.4848,
+            15.5086
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:50:47Z",
+          "avg_ns": 110545465940,
+          "stddev_ns": 1035425923,
+          "avg_ts": 4.631848,
+          "stddev_ts": 0.04316,
+          "samples_ns": [
+            109852868725,
+            111735765997,
+            110047763100
+          ],
+          "samples_ts": [
+            4.66078,
+            4.58224,
+            4.65253
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 945
+    },
+    {
+      "timestamp_utc": "2025-12-10T01:59:58.156835+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:56:21Z\",\n    \"avg_ns\": 33814408636,\n    \"stddev_ns\": 3203970,\n    \"avg_ts\": 15.141474,\n    \"stddev_ts\": 0.001430,\n    \"samples_ns\": [ 33818067400, 33812181906, 33812976604 ],\n    \"samples_ts\": [ 15.1398, 15.1425, 15.1421 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T01:58:36Z\",\n    \"avg_ns\": 27118900458,\n    \"stddev_ns\": 132756909,\n    \"avg_ts\": 4.720031,\n    \"stddev_ts\": 0.023052,\n    \"samples_ns\": [ 27017819212, 27069634739, 27269247424 ],\n    \"samples_ts\": [ 4.73761, 4.72855, 4.69393 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:56:21Z",
+          "avg_ns": 33814408636,
+          "stddev_ns": 3203970,
+          "avg_ts": 15.141474,
+          "stddev_ts": 0.00143,
+          "samples_ns": [
+            33818067400,
+            33812181906,
+            33812976604
+          ],
+          "samples_ts": [
+            15.1398,
+            15.1425,
+            15.1421
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T01:58:36Z",
+          "avg_ns": 27118900458,
+          "stddev_ns": 132756909,
+          "avg_ts": 4.720031,
+          "stddev_ts": 0.023052,
+          "samples_ns": [
+            27017819212,
+            27069634739,
+            27269247424
+          ],
+          "samples_ts": [
+            4.73761,
+            4.72855,
+            4.69393
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 946
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:07:45.510825+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:00:00Z\",\n    \"avg_ns\": 33807885633,\n    \"stddev_ns\": 5107402,\n    \"avg_ts\": 15.144396,\n    \"stddev_ts\": 0.002288,\n    \"samples_ns\": [ 33811504757, 33802043452, 33810108690 ],\n    \"samples_ts\": [ 15.1428, 15.147, 15.1434 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:02:15Z\",\n    \"avg_ns\": 109868351442,\n    \"stddev_ns\": 278297226,\n    \"avg_ts\": 4.660143,\n    \"stddev_ts\": 0.011787,\n    \"samples_ns\": [ 109688475739, 109727675364, 110188903224 ],\n    \"samples_ts\": [ 4.66776, 4.6661, 4.64657 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:00:00Z",
+          "avg_ns": 33807885633,
+          "stddev_ns": 5107402,
+          "avg_ts": 15.144396,
+          "stddev_ts": 0.002288,
+          "samples_ns": [
+            33811504757,
+            33802043452,
+            33810108690
+          ],
+          "samples_ts": [
+            15.1428,
+            15.147,
+            15.1434
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:02:15Z",
+          "avg_ns": 109868351442,
+          "stddev_ns": 278297226,
+          "avg_ts": 4.660143,
+          "stddev_ts": 0.011787,
+          "samples_ns": [
+            109688475739,
+            109727675364,
+            110188903224
+          ],
+          "samples_ts": [
+            4.66776,
+            4.6661,
+            4.64657
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 947
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:09:42.244826+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:07:47Z\",\n    \"avg_ns\": 8262944038,\n    \"stddev_ns\": 8833927,\n    \"avg_ts\": 15.490859,\n    \"stddev_ts\": 0.016551,\n    \"samples_ns\": [ 8273142966, 8257687488, 8258001660 ],\n    \"samples_ts\": [ 15.4718, 15.5007, 15.5001 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:08:20Z\",\n    \"avg_ns\": 27066918663,\n    \"stddev_ns\": 11105487,\n    \"avg_ts\": 4.729021,\n    \"stddev_ts\": 0.001939,\n    \"samples_ns\": [ 27079604773, 27058971025, 27062180193 ],\n    \"samples_ts\": [ 4.7268, 4.73041, 4.72985 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:07:47Z",
+          "avg_ns": 8262944038,
+          "stddev_ns": 8833927,
+          "avg_ts": 15.490859,
+          "stddev_ts": 0.016551,
+          "samples_ns": [
+            8273142966,
+            8257687488,
+            8258001660
+          ],
+          "samples_ts": [
+            15.4718,
+            15.5007,
+            15.5001
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:08:20Z",
+          "avg_ns": 27066918663,
+          "stddev_ns": 11105487,
+          "avg_ts": 4.729021,
+          "stddev_ts": 0.001939,
+          "samples_ns": [
+            27079604773,
+            27058971025,
+            27062180193
+          ],
+          "samples_ts": [
+            4.7268,
+            4.73041,
+            4.72985
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 948
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:15:46.485055+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:09:44Z\",\n    \"avg_ns\": 8250914040,\n    \"stddev_ns\": 13210300,\n    \"avg_ts\": 15.513459,\n    \"stddev_ts\": 0.024857,\n    \"samples_ns\": [ 8260950799, 8235947814, 8255843507 ],\n    \"samples_ts\": [ 15.4946, 15.5416, 15.5042 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:10:17Z\",\n    \"avg_ns\": 109577507013,\n    \"stddev_ns\": 15258322,\n    \"avg_ts\": 4.672492,\n    \"stddev_ts\": 0.000650,\n    \"samples_ns\": [ 109573053430, 109564978419, 109594489192 ],\n    \"samples_ts\": [ 4.67268, 4.67303, 4.67177 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:09:44Z",
+          "avg_ns": 8250914040,
+          "stddev_ns": 13210300,
+          "avg_ts": 15.513459,
+          "stddev_ts": 0.024857,
+          "samples_ns": [
+            8260950799,
+            8235947814,
+            8255843507
+          ],
+          "samples_ts": [
+            15.4946,
+            15.5416,
+            15.5042
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:10:17Z",
+          "avg_ns": 109577507013,
+          "stddev_ns": 15258322,
+          "avg_ts": 4.672492,
+          "stddev_ts": 0.00065,
+          "samples_ns": [
+            109573053430,
+            109564978419,
+            109594489192
+          ],
+          "samples_ts": [
+            4.67268,
+            4.67303,
+            4.67177
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 949
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:19:22.814922+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:15:48Z\",\n    \"avg_ns\": 33137665831,\n    \"stddev_ns\": 39195241,\n    \"avg_ts\": 15.450710,\n    \"stddev_ts\": 0.018280,\n    \"samples_ns\": [ 33144076073, 33173260820, 33095660600 ],\n    \"samples_ts\": [ 15.4477, 15.4341, 15.4703 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:18:01Z\",\n    \"avg_ns\": 27067871704,\n    \"stddev_ns\": 22790324,\n    \"avg_ts\": 4.728856,\n    \"stddev_ts\": 0.003981,\n    \"samples_ns\": [ 27091398430, 27066316900, 27045899784 ],\n    \"samples_ts\": [ 4.72475, 4.72913, 4.7327 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:15:48Z",
+          "avg_ns": 33137665831,
+          "stddev_ns": 39195241,
+          "avg_ts": 15.45071,
+          "stddev_ts": 0.01828,
+          "samples_ns": [
+            33144076073,
+            33173260820,
+            33095660600
+          ],
+          "samples_ts": [
+            15.4477,
+            15.4341,
+            15.4703
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:18:01Z",
+          "avg_ns": 27067871704,
+          "stddev_ns": 22790324,
+          "avg_ts": 4.728856,
+          "stddev_ts": 0.003981,
+          "samples_ns": [
+            27091398430,
+            27066316900,
+            27045899784
+          ],
+          "samples_ts": [
+            4.72475,
+            4.72913,
+            4.7327
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 950
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:27:12.816452+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:19:24Z\",\n    \"avg_ns\": 33187540110,\n    \"stddev_ns\": 4457185,\n    \"avg_ts\": 15.427477,\n    \"stddev_ts\": 0.002069,\n    \"samples_ns\": [ 33191810993, 33187878493, 33182930846 ],\n    \"samples_ts\": [ 15.4255, 15.4273, 15.4296 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:21:37Z\",\n    \"avg_ns\": 111561671738,\n    \"stddev_ns\": 457813975,\n    \"avg_ts\": 4.589441,\n    \"stddev_ts\": 0.018868,\n    \"samples_ns\": [ 111047395519, 111712821631, 111924798065 ],\n    \"samples_ts\": [ 4.61064, 4.58318, 4.5745 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:19:24Z",
+          "avg_ns": 33187540110,
+          "stddev_ns": 4457185,
+          "avg_ts": 15.427477,
+          "stddev_ts": 0.002069,
+          "samples_ns": [
+            33191810993,
+            33187878493,
+            33182930846
+          ],
+          "samples_ts": [
+            15.4255,
+            15.4273,
+            15.4296
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:21:37Z",
+          "avg_ns": 111561671738,
+          "stddev_ns": 457813975,
+          "avg_ts": 4.589441,
+          "stddev_ts": 0.018868,
+          "samples_ns": [
+            111047395519,
+            111712821631,
+            111924798065
+          ],
+          "samples_ts": [
+            4.61064,
+            4.58318,
+            4.5745
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 951
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:29:09.693844+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:27:14Z\",\n    \"avg_ns\": 8258568855,\n    \"stddev_ns\": 11222604,\n    \"avg_ts\": 15.499073,\n    \"stddev_ts\": 0.021045,\n    \"samples_ns\": [ 8271519251, 8252496493, 8251690821 ],\n    \"samples_ts\": [ 15.4748, 15.5105, 15.512 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:27:47Z\",\n    \"avg_ns\": 27123223669,\n    \"stddev_ns\": 23201743,\n    \"avg_ts\": 4.719205,\n    \"stddev_ts\": 0.004038,\n    \"samples_ns\": [ 27096731952, 27133018684, 27139920373 ],\n    \"samples_ts\": [ 4.72382, 4.7175, 4.7163 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:27:14Z",
+          "avg_ns": 8258568855,
+          "stddev_ns": 11222604,
+          "avg_ts": 15.499073,
+          "stddev_ts": 0.021045,
+          "samples_ns": [
+            8271519251,
+            8252496493,
+            8251690821
+          ],
+          "samples_ts": [
+            15.4748,
+            15.5105,
+            15.512
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:27:47Z",
+          "avg_ns": 27123223669,
+          "stddev_ns": 23201743,
+          "avg_ts": 4.719205,
+          "stddev_ts": 0.004038,
+          "samples_ns": [
+            27096731952,
+            27133018684,
+            27139920373
+          ],
+          "samples_ts": [
+            4.72382,
+            4.7175,
+            4.7163
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 952
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:35:18.485155+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:29:11Z\",\n    \"avg_ns\": 8250958731,\n    \"stddev_ns\": 4077704,\n    \"avg_ts\": 15.513351,\n    \"stddev_ts\": 0.007663,\n    \"samples_ns\": [ 8250972451, 8246876209, 8255027535 ],\n    \"samples_ts\": [ 15.5133, 15.521, 15.5057 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:29:44Z\",\n    \"avg_ns\": 111088485966,\n    \"stddev_ns\": 3124091110,\n    \"avg_ts\": 4.609072,\n    \"stddev_ts\": 0.030448,\n    \"samples_ns\": [ 110295850177, 111740478233, 111229129490 ],\n    \"samples_ts\": [ 4.64206, 4.58205, 4.60311 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:29:11Z",
+          "avg_ns": 8250958731,
+          "stddev_ns": 4077704,
+          "avg_ts": 15.513351,
+          "stddev_ts": 0.007663,
+          "samples_ns": [
+            8250972451,
+            8246876209,
+            8255027535
+          ],
+          "samples_ts": [
+            15.5133,
+            15.521,
+            15.5057
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:29:44Z",
+          "avg_ns": 111088485966,
+          "stddev_ns": 3124091110,
+          "avg_ts": 4.609072,
+          "stddev_ts": 0.030448,
+          "samples_ns": [
+            110295850177,
+            111740478233,
+            111229129490
+          ],
+          "samples_ts": [
+            4.64206,
+            4.58205,
+            4.60311
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 953
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:38:56.171377+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:35:20Z\",\n    \"avg_ns\": 33325906575,\n    \"stddev_ns\": 15747026,\n    \"avg_ts\": 15.363425,\n    \"stddev_ts\": 0.007261,\n    \"samples_ns\": [ 33331505662, 33338088904, 33308125159 ],\n    \"samples_ts\": [ 15.3608, 15.3578, 15.3716 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:37:33Z\",\n    \"avg_ns\": 27297504580,\n    \"stddev_ns\": 264831318,\n    \"avg_ts\": 4.689369,\n    \"stddev_ts\": 0.045740,\n    \"samples_ns\": [ 26993237812, 27476130850, 27423145080 ],\n    \"samples_ts\": [ 4.74193, 4.65859, 4.66759 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:35:20Z",
+          "avg_ns": 33325906575,
+          "stddev_ns": 15747026,
+          "avg_ts": 15.363425,
+          "stddev_ts": 0.007261,
+          "samples_ns": [
+            33331505662,
+            33338088904,
+            33308125159
+          ],
+          "samples_ts": [
+            15.3608,
+            15.3578,
+            15.3716
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:37:33Z",
+          "avg_ns": 27297504580,
+          "stddev_ns": 264831318,
+          "avg_ts": 4.689369,
+          "stddev_ts": 0.04574,
+          "samples_ns": [
+            26993237812,
+            27476130850,
+            27423145080
+          ],
+          "samples_ts": [
+            4.74193,
+            4.65859,
+            4.66759
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 954
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:46:40.112479+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:38:58Z\",\n    \"avg_ns\": 33324557638,\n    \"stddev_ns\": 5697506,\n    \"avg_ts\": 15.364045,\n    \"stddev_ts\": 0.002627,\n    \"samples_ns\": [ 33322122584, 33331068034, 33320482296 ],\n    \"samples_ts\": [ 15.3652, 15.361, 15.3659 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:41:11Z\",\n    \"avg_ns\": 109366809824,\n    \"stddev_ns\": 16139050,\n    \"avg_ts\": 4.681493,\n    \"stddev_ts\": 0.000690,\n    \"samples_ns\": [ 109385437692, 109357449597, 109357542185 ],\n    \"samples_ts\": [ 4.6807, 4.68189, 4.68189 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:38:58Z",
+          "avg_ns": 33324557638,
+          "stddev_ns": 5697506,
+          "avg_ts": 15.364045,
+          "stddev_ts": 0.002627,
+          "samples_ns": [
+            33322122584,
+            33331068034,
+            33320482296
+          ],
+          "samples_ts": [
+            15.3652,
+            15.361,
+            15.3659
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:41:11Z",
+          "avg_ns": 109366809824,
+          "stddev_ns": 16139050,
+          "avg_ts": 4.681493,
+          "stddev_ts": 0.00069,
+          "samples_ns": [
+            109385437692,
+            109357449597,
+            109357542185
+          ],
+          "samples_ts": [
+            4.6807,
+            4.68189,
+            4.68189
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 955
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:48:37.040932+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:46:42Z\",\n    \"avg_ns\": 8253887452,\n    \"stddev_ns\": 15038018,\n    \"avg_ts\": 15.507878,\n    \"stddev_ts\": 0.028224,\n    \"samples_ns\": [ 8245206184, 8245204630, 8271251543 ],\n    \"samples_ts\": [ 15.5242, 15.5242, 15.4753 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:47:15Z\",\n    \"avg_ns\": 27152971143,\n    \"stddev_ns\": 2571690,\n    \"avg_ts\": 4.714033,\n    \"stddev_ts\": 0.000446,\n    \"samples_ns\": [ 27150314698, 27155436869, 27153161863 ],\n    \"samples_ts\": [ 4.71449, 4.7136, 4.714 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:46:42Z",
+          "avg_ns": 8253887452,
+          "stddev_ns": 15038018,
+          "avg_ts": 15.507878,
+          "stddev_ts": 0.028224,
+          "samples_ns": [
+            8245206184,
+            8245204630,
+            8271251543
+          ],
+          "samples_ts": [
+            15.5242,
+            15.5242,
+            15.4753
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:47:15Z",
+          "avg_ns": 27152971143,
+          "stddev_ns": 2571690,
+          "avg_ts": 4.714033,
+          "stddev_ts": 0.000446,
+          "samples_ns": [
+            27150314698,
+            27155436869,
+            27153161863
+          ],
+          "samples_ts": [
+            4.71449,
+            4.7136,
+            4.714
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 956
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:54:45.201835+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:48:39Z\",\n    \"avg_ns\": 8254288207,\n    \"stddev_ns\": 9150639,\n    \"avg_ts\": 15.507104,\n    \"stddev_ts\": 0.017195,\n    \"samples_ns\": [ 8262728202, 8255573577, 8244562842 ],\n    \"samples_ts\": [ 15.4913, 15.5047, 15.5254 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:49:12Z\",\n    \"avg_ns\": 110885390573,\n    \"stddev_ns\": 922714982,\n    \"avg_ts\": 4.617594,\n    \"stddev_ts\": 0.038494,\n    \"samples_ns\": [ 109902815283, 111019870055, 111733486383 ],\n    \"samples_ts\": [ 4.65866, 4.61179, 4.58233 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:48:39Z",
+          "avg_ns": 8254288207,
+          "stddev_ns": 9150639,
+          "avg_ts": 15.507104,
+          "stddev_ts": 0.017195,
+          "samples_ns": [
+            8262728202,
+            8255573577,
+            8244562842
+          ],
+          "samples_ts": [
+            15.4913,
+            15.5047,
+            15.5254
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:49:12Z",
+          "avg_ns": 110885390573,
+          "stddev_ns": 922714982,
+          "avg_ts": 4.617594,
+          "stddev_ts": 0.038494,
+          "samples_ns": [
+            109902815283,
+            111019870055,
+            111733486383
+          ],
+          "samples_ts": [
+            4.65866,
+            4.61179,
+            4.58233
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 957
+    },
+    {
+      "timestamp_utc": "2025-12-10T02:58:25.734077+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:54:47Z\",\n    \"avg_ns\": 33817783675,\n    \"stddev_ns\": 6906955,\n    \"avg_ts\": 15.139964,\n    \"stddev_ts\": 0.003090,\n    \"samples_ns\": [ 33810035858, 33823275266, 33820039903 ],\n    \"samples_ts\": [ 15.1434, 15.1375, 15.139 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:57:02Z\",\n    \"avg_ns\": 27573648448,\n    \"stddev_ns\": 548778644,\n    \"avg_ts\": 4.643340,\n    \"stddev_ts\": 0.092454,\n    \"samples_ns\": [ 27022654975, 27578105356, 28120185015 ],\n    \"samples_ts\": [ 4.73677, 4.64136, 4.55189 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:54:47Z",
+          "avg_ns": 33817783675,
+          "stddev_ns": 6906955,
+          "avg_ts": 15.139964,
+          "stddev_ts": 0.00309,
+          "samples_ns": [
+            33810035858,
+            33823275266,
+            33820039903
+          ],
+          "samples_ts": [
+            15.1434,
+            15.1375,
+            15.139
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:57:02Z",
+          "avg_ns": 27573648448,
+          "stddev_ns": 548778644,
+          "avg_ts": 4.64334,
+          "stddev_ts": 0.092454,
+          "samples_ns": [
+            27022654975,
+            27578105356,
+            28120185015
+          ],
+          "samples_ts": [
+            4.73677,
+            4.64136,
+            4.55189
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 958
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:06:14.051802+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T02:58:27Z\",\n    \"avg_ns\": 33798805157,\n    \"stddev_ns\": 9398271,\n    \"avg_ts\": 15.148465,\n    \"stddev_ts\": 0.004211,\n    \"samples_ns\": [ 33809448695, 33791658551, 33795308226 ],\n    \"samples_ts\": [ 15.1437, 15.1517, 15.15 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:00:43Z\",\n    \"avg_ns\": 110184761522,\n    \"stddev_ns\": 353783885,\n    \"avg_ts\": 4.646772,\n    \"stddev_ts\": 0.014908,\n    \"samples_ns\": [ 110565004699, 110123962717, 109865317150 ],\n    \"samples_ts\": [ 4.63076, 4.64931, 4.66025 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T02:58:27Z",
+          "avg_ns": 33798805157,
+          "stddev_ns": 9398271,
+          "avg_ts": 15.148465,
+          "stddev_ts": 0.004211,
+          "samples_ns": [
+            33809448695,
+            33791658551,
+            33795308226
+          ],
+          "samples_ts": [
+            15.1437,
+            15.1517,
+            15.15
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:00:43Z",
+          "avg_ns": 110184761522,
+          "stddev_ns": 353783885,
+          "avg_ts": 4.646772,
+          "stddev_ts": 0.014908,
+          "samples_ns": [
+            110565004699,
+            110123962717,
+            109865317150
+          ],
+          "samples_ts": [
+            4.63076,
+            4.64931,
+            4.66025
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 959
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:08:10.770079+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:06:16Z\",\n    \"avg_ns\": 8246904466,\n    \"stddev_ns\": 2610606,\n    \"avg_ts\": 15.520976,\n    \"stddev_ts\": 0.004911,\n    \"samples_ns\": [ 8243937647, 8248841204, 8247934548 ],\n    \"samples_ts\": [ 15.5266, 15.5173, 15.519 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:06:49Z\",\n    \"avg_ns\": 27074202960,\n    \"stddev_ns\": 9650175,\n    \"avg_ts\": 4.727748,\n    \"stddev_ts\": 0.001685,\n    \"samples_ns\": [ 27082048244, 27077133385, 27063427251 ],\n    \"samples_ts\": [ 4.72638, 4.72724, 4.72963 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:06:16Z",
+          "avg_ns": 8246904466,
+          "stddev_ns": 2610606,
+          "avg_ts": 15.520976,
+          "stddev_ts": 0.004911,
+          "samples_ns": [
+            8243937647,
+            8248841204,
+            8247934548
+          ],
+          "samples_ts": [
+            15.5266,
+            15.5173,
+            15.519
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:06:49Z",
+          "avg_ns": 27074202960,
+          "stddev_ns": 9650175,
+          "avg_ts": 4.727748,
+          "stddev_ts": 0.001685,
+          "samples_ns": [
+            27082048244,
+            27077133385,
+            27063427251
+          ],
+          "samples_ts": [
+            4.72638,
+            4.72724,
+            4.72963
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 960
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:14:15.152619+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:08:12Z\",\n    \"avg_ns\": 8259733721,\n    \"stddev_ns\": 11805253,\n    \"avg_ts\": 15.496889,\n    \"stddev_ts\": 0.022149,\n    \"samples_ns\": [ 8271514958, 8247905292, 8259780914 ],\n    \"samples_ts\": [ 15.4748, 15.5191, 15.4968 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:08:45Z\",\n    \"avg_ns\": 109623126556,\n    \"stddev_ns\": 13864868,\n    \"avg_ts\": 4.670547,\n    \"stddev_ts\": 0.000590,\n    \"samples_ns\": [ 109633115490, 109607307062, 109628957118 ],\n    \"samples_ts\": [ 4.67012, 4.67122, 4.6703 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:08:12Z",
+          "avg_ns": 8259733721,
+          "stddev_ns": 11805253,
+          "avg_ts": 15.496889,
+          "stddev_ts": 0.022149,
+          "samples_ns": [
+            8271514958,
+            8247905292,
+            8259780914
+          ],
+          "samples_ts": [
+            15.4748,
+            15.5191,
+            15.4968
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:08:45Z",
+          "avg_ns": 109623126556,
+          "stddev_ns": 13864868,
+          "avg_ts": 4.670547,
+          "stddev_ts": 0.00059,
+          "samples_ns": [
+            109633115490,
+            109607307062,
+            109628957118
+          ],
+          "samples_ts": [
+            4.67012,
+            4.67122,
+            4.6703
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 961
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:17:51.692917+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:14:17Z\",\n    \"avg_ns\": 33152679145,\n    \"stddev_ns\": 1890094,\n    \"avg_ts\": 15.443699,\n    \"stddev_ts\": 0.000872,\n    \"samples_ns\": [ 33152905060, 33150703966, 33154428411 ],\n    \"samples_ts\": [ 15.4436, 15.4446, 15.4429 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:16:29Z\",\n    \"avg_ns\": 27131039250,\n    \"stddev_ns\": 20249406,\n    \"avg_ts\": 4.717845,\n    \"stddev_ts\": 0.003522,\n    \"samples_ns\": [ 27109133662, 27149073525, 27134910563 ],\n    \"samples_ts\": [ 4.72166, 4.71471, 4.71717 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:14:17Z",
+          "avg_ns": 33152679145,
+          "stddev_ns": 1890094,
+          "avg_ts": 15.443699,
+          "stddev_ts": 0.000872,
+          "samples_ns": [
+            33152905060,
+            33150703966,
+            33154428411
+          ],
+          "samples_ts": [
+            15.4436,
+            15.4446,
+            15.4429
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:16:29Z",
+          "avg_ns": 27131039250,
+          "stddev_ns": 20249406,
+          "avg_ts": 4.717845,
+          "stddev_ts": 0.003522,
+          "samples_ns": [
+            27109133662,
+            27149073525,
+            27134910563
+          ],
+          "samples_ts": [
+            4.72166,
+            4.71471,
+            4.71717
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 962
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:25:36.087557+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:17:53Z\",\n    \"avg_ns\": 33165607398,\n    \"stddev_ns\": 8075253,\n    \"avg_ts\": 15.437680,\n    \"stddev_ts\": 0.003759,\n    \"samples_ns\": [ 33165897206, 33173533846, 33157391142 ],\n    \"samples_ts\": [ 15.4375, 15.434, 15.4415 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:20:06Z\",\n    \"avg_ns\": 109732782900,\n    \"stddev_ns\": 180384677,\n    \"avg_ts\": 4.665888,\n    \"stddev_ts\": 0.007663,\n    \"samples_ns\": [ 109653747113, 109605407354, 109939194235 ],\n    \"samples_ts\": [ 4.66924, 4.6713, 4.65712 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:17:53Z",
+          "avg_ns": 33165607398,
+          "stddev_ns": 8075253,
+          "avg_ts": 15.43768,
+          "stddev_ts": 0.003759,
+          "samples_ns": [
+            33165897206,
+            33173533846,
+            33157391142
+          ],
+          "samples_ts": [
+            15.4375,
+            15.434,
+            15.4415
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:20:06Z",
+          "avg_ns": 109732782900,
+          "stddev_ns": 180384677,
+          "avg_ts": 4.665888,
+          "stddev_ts": 0.007663,
+          "samples_ns": [
+            109653747113,
+            109605407354,
+            109939194235
+          ],
+          "samples_ts": [
+            4.66924,
+            4.6713,
+            4.65712
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 963
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:27:32.897844+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:25:38Z\",\n    \"avg_ns\": 8246763011,\n    \"stddev_ns\": 8663848,\n    \"avg_ts\": 15.521253,\n    \"stddev_ts\": 0.016304,\n    \"samples_ns\": [ 8238432513, 8255725590, 8246130930 ],\n    \"samples_ts\": [ 15.5369, 15.5044, 15.5224 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:26:11Z\",\n    \"avg_ns\": 27116340734,\n    \"stddev_ns\": 10038590,\n    \"avg_ts\": 4.720401,\n    \"stddev_ts\": 0.001747,\n    \"samples_ns\": [ 27127777664, 27112239844, 27109004696 ],\n    \"samples_ts\": [ 4.71841, 4.72111, 4.72168 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:25:38Z",
+          "avg_ns": 8246763011,
+          "stddev_ns": 8663848,
+          "avg_ts": 15.521253,
+          "stddev_ts": 0.016304,
+          "samples_ns": [
+            8238432513,
+            8255725590,
+            8246130930
+          ],
+          "samples_ts": [
+            15.5369,
+            15.5044,
+            15.5224
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:26:11Z",
+          "avg_ns": 27116340734,
+          "stddev_ns": 10038590,
+          "avg_ts": 4.720401,
+          "stddev_ts": 0.001747,
+          "samples_ns": [
+            27127777664,
+            27112239844,
+            27109004696
+          ],
+          "samples_ts": [
+            4.71841,
+            4.72111,
+            4.72168
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 964
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:33:36.876239+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:27:35Z\",\n    \"avg_ns\": 8251336242,\n    \"stddev_ns\": 9262998,\n    \"avg_ts\": 15.512652,\n    \"stddev_ts\": 0.017403,\n    \"samples_ns\": [ 8261931274, 8244772289, 8247305164 ],\n    \"samples_ts\": [ 15.4927, 15.525, 15.5202 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:28:08Z\",\n    \"avg_ns\": 109486257436,\n    \"stddev_ns\": 355895634,\n    \"avg_ts\": 4.676419,\n    \"stddev_ts\": 0.015173,\n    \"samples_ns\": [ 109280711393, 109280850606, 109897210309 ],\n    \"samples_ts\": [ 4.68518, 4.68518, 4.6589 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:27:35Z",
+          "avg_ns": 8251336242,
+          "stddev_ns": 9262998,
+          "avg_ts": 15.512652,
+          "stddev_ts": 0.017403,
+          "samples_ns": [
+            8261931274,
+            8244772289,
+            8247305164
+          ],
+          "samples_ts": [
+            15.4927,
+            15.525,
+            15.5202
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:28:08Z",
+          "avg_ns": 109486257436,
+          "stddev_ns": 355895634,
+          "avg_ts": 4.676419,
+          "stddev_ts": 0.015173,
+          "samples_ns": [
+            109280711393,
+            109280850606,
+            109897210309
+          ],
+          "samples_ts": [
+            4.68518,
+            4.68518,
+            4.6589
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 965
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:37:13.706376+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:33:39Z\",\n    \"avg_ns\": 33322303268,\n    \"stddev_ns\": 13052999,\n    \"avg_ts\": 15.365086,\n    \"stddev_ts\": 0.006017,\n    \"samples_ns\": [ 33336334484, 33320047692, 33310527630 ],\n    \"samples_ts\": [ 15.3586, 15.3661, 15.3705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:35:52Z\",\n    \"avg_ns\": 27018966572,\n    \"stddev_ns\": 21006339,\n    \"avg_ts\": 4.737415,\n    \"stddev_ts\": 0.003682,\n    \"samples_ns\": [ 27043206307, 27007616584, 27006076825 ],\n    \"samples_ts\": [ 4.73317, 4.7394, 4.73967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:33:39Z",
+          "avg_ns": 33322303268,
+          "stddev_ns": 13052999,
+          "avg_ts": 15.365086,
+          "stddev_ts": 0.006017,
+          "samples_ns": [
+            33336334484,
+            33320047692,
+            33310527630
+          ],
+          "samples_ts": [
+            15.3586,
+            15.3661,
+            15.3705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:35:52Z",
+          "avg_ns": 27018966572,
+          "stddev_ns": 21006339,
+          "avg_ts": 4.737415,
+          "stddev_ts": 0.003682,
+          "samples_ns": [
+            27043206307,
+            27007616584,
+            27006076825
+          ],
+          "samples_ts": [
+            4.73317,
+            4.7394,
+            4.73967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 966
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:44:57.686790+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:37:15Z\",\n    \"avg_ns\": 33303312454,\n    \"stddev_ns\": 10740848,\n    \"avg_ts\": 15.373847,\n    \"stddev_ts\": 0.004959,\n    \"samples_ns\": [ 33313088328, 33291814673, 33305034361 ],\n    \"samples_ts\": [ 15.3693, 15.3792, 15.3731 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:39:29Z\",\n    \"avg_ns\": 109425826375,\n    \"stddev_ns\": 36831105,\n    \"avg_ts\": 4.678969,\n    \"stddev_ts\": 0.001575,\n    \"samples_ns\": [ 109465158921, 109420164890, 109392155315 ],\n    \"samples_ts\": [ 4.67729, 4.67921, 4.68041 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:37:15Z",
+          "avg_ns": 33303312454,
+          "stddev_ns": 10740848,
+          "avg_ts": 15.373847,
+          "stddev_ts": 0.004959,
+          "samples_ns": [
+            33313088328,
+            33291814673,
+            33305034361
+          ],
+          "samples_ts": [
+            15.3693,
+            15.3792,
+            15.3731
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:39:29Z",
+          "avg_ns": 109425826375,
+          "stddev_ns": 36831105,
+          "avg_ts": 4.678969,
+          "stddev_ts": 0.001575,
+          "samples_ns": [
+            109465158921,
+            109420164890,
+            109392155315
+          ],
+          "samples_ts": [
+            4.67729,
+            4.67921,
+            4.68041
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 967
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:46:54.386770+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:44:59Z\",\n    \"avg_ns\": 8251708844,\n    \"stddev_ns\": 2790907,\n    \"avg_ts\": 15.511940,\n    \"stddev_ts\": 0.005240,\n    \"samples_ns\": [ 8254898880, 8250488549, 8249739105 ],\n    \"samples_ts\": [ 15.5059, 15.5142, 15.5156 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:45:32Z\",\n    \"avg_ns\": 27072106627,\n    \"stddev_ns\": 10293841,\n    \"avg_ts\": 4.728114,\n    \"stddev_ts\": 0.001798,\n    \"samples_ns\": [ 27082672988, 27062108876, 27071538017 ],\n    \"samples_ts\": [ 4.72627, 4.72986, 4.72821 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:44:59Z",
+          "avg_ns": 8251708844,
+          "stddev_ns": 2790907,
+          "avg_ts": 15.51194,
+          "stddev_ts": 0.00524,
+          "samples_ns": [
+            8254898880,
+            8250488549,
+            8249739105
+          ],
+          "samples_ts": [
+            15.5059,
+            15.5142,
+            15.5156
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:45:32Z",
+          "avg_ns": 27072106627,
+          "stddev_ns": 10293841,
+          "avg_ts": 4.728114,
+          "stddev_ts": 0.001798,
+          "samples_ns": [
+            27082672988,
+            27062108876,
+            27071538017
+          ],
+          "samples_ts": [
+            4.72627,
+            4.72986,
+            4.72821
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 968
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:52:59.131819+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:46:56Z\",\n    \"avg_ns\": 8257481560,\n    \"stddev_ns\": 20339401,\n    \"avg_ts\": 15.501157,\n    \"stddev_ts\": 0.038131,\n    \"samples_ns\": [ 8243350739, 8280792676, 8248301266 ],\n    \"samples_ts\": [ 15.5277, 15.4575, 15.5183 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:47:29Z\",\n    \"avg_ns\": 109747240667,\n    \"stddev_ns\": 794939692,\n    \"avg_ts\": 4.665428,\n    \"stddev_ts\": 0.033672,\n    \"samples_ns\": [ 109155485055, 109435419645, 110650817303 ],\n    \"samples_ts\": [ 4.69056, 4.67856, 4.62717 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:46:56Z",
+          "avg_ns": 8257481560,
+          "stddev_ns": 20339401,
+          "avg_ts": 15.501157,
+          "stddev_ts": 0.038131,
+          "samples_ns": [
+            8243350739,
+            8280792676,
+            8248301266
+          ],
+          "samples_ts": [
+            15.5277,
+            15.4575,
+            15.5183
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:47:29Z",
+          "avg_ns": 109747240667,
+          "stddev_ns": 794939692,
+          "avg_ts": 4.665428,
+          "stddev_ts": 0.033672,
+          "samples_ns": [
+            109155485055,
+            109435419645,
+            110650817303
+          ],
+          "samples_ts": [
+            4.69056,
+            4.67856,
+            4.62717
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 969
+    },
+    {
+      "timestamp_utc": "2025-12-10T03:56:39.820817+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:53:01Z\",\n    \"avg_ns\": 33803311970,\n    \"stddev_ns\": 5650980,\n    \"avg_ts\": 15.146445,\n    \"stddev_ts\": 0.002532,\n    \"samples_ns\": [ 33809680620, 33801358011, 33798897279 ],\n    \"samples_ts\": [ 15.1436, 15.1473, 15.1484 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:55:16Z\",\n    \"avg_ns\": 27659878454,\n    \"stddev_ns\": 424618376,\n    \"avg_ts\": 4.628375,\n    \"stddev_ts\": 0.071686,\n    \"samples_ns\": [ 27169719047, 27894540543, 27915375774 ],\n    \"samples_ts\": [ 4.71113, 4.58871, 4.58529 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:53:01Z",
+          "avg_ns": 33803311970,
+          "stddev_ns": 5650980,
+          "avg_ts": 15.146445,
+          "stddev_ts": 0.002532,
+          "samples_ns": [
+            33809680620,
+            33801358011,
+            33798897279
+          ],
+          "samples_ts": [
+            15.1436,
+            15.1473,
+            15.1484
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:55:16Z",
+          "avg_ns": 27659878454,
+          "stddev_ns": 424618376,
+          "avg_ts": 4.628375,
+          "stddev_ts": 0.071686,
+          "samples_ns": [
+            27169719047,
+            27894540543,
+            27915375774
+          ],
+          "samples_ts": [
+            4.71113,
+            4.58871,
+            4.58529
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 970
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:04:31.415916+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:56:41Z\",\n    \"avg_ns\": 33807236291,\n    \"stddev_ns\": 2592138,\n    \"avg_ts\": 15.144687,\n    \"stddev_ts\": 0.001161,\n    \"samples_ns\": [ 33804585340, 33807358220, 33809765313 ],\n    \"samples_ts\": [ 15.1459, 15.1446, 15.1436 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T03:58:57Z\",\n    \"avg_ns\": 111284663287,\n    \"stddev_ns\": 1103238067,\n    \"avg_ts\": 4.601114,\n    \"stddev_ts\": 0.045428,\n    \"samples_ns\": [ 110389677293, 112517257734, 110947054834 ],\n    \"samples_ts\": [ 4.63811, 4.55041, 4.61481 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:56:41Z",
+          "avg_ns": 33807236291,
+          "stddev_ns": 2592138,
+          "avg_ts": 15.144687,
+          "stddev_ts": 0.001161,
+          "samples_ns": [
+            33804585340,
+            33807358220,
+            33809765313
+          ],
+          "samples_ts": [
+            15.1459,
+            15.1446,
+            15.1436
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T03:58:57Z",
+          "avg_ns": 111284663287,
+          "stddev_ns": 1103238067,
+          "avg_ts": 4.601114,
+          "stddev_ts": 0.045428,
+          "samples_ns": [
+            110389677293,
+            112517257734,
+            110947054834
+          ],
+          "samples_ts": [
+            4.63811,
+            4.55041,
+            4.61481
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 971
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:06:28.464178+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:04:33Z\",\n    \"avg_ns\": 6469432855,\n    \"stddev_ns\": 19761769,\n    \"avg_ts\": 19.785474,\n    \"stddev_ts\": 0.060409,\n    \"samples_ns\": [ 6490117354, 6450746252, 6467434961 ],\n    \"samples_ts\": [ 19.7223, 19.8427, 19.7915 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:04:59Z\",\n    \"avg_ns\": 29569858560,\n    \"stddev_ns\": 100339267,\n    \"avg_ts\": 4.328765,\n    \"stddev_ts\": 0.014700,\n    \"samples_ns\": [ 29462625987, 29585477209, 29661472485 ],\n    \"samples_ts\": [ 4.34449, 4.32645, 4.31536 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:04:33Z",
+          "avg_ns": 6469432855,
+          "stddev_ns": 19761769,
+          "avg_ts": 19.785474,
+          "stddev_ts": 0.060409,
+          "samples_ns": [
+            6490117354,
+            6450746252,
+            6467434961
+          ],
+          "samples_ts": [
+            19.7223,
+            19.8427,
+            19.7915
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:04:59Z",
+          "avg_ns": 29569858560,
+          "stddev_ns": 100339267,
+          "avg_ts": 4.328765,
+          "stddev_ts": 0.0147,
+          "samples_ns": [
+            29462625987,
+            29585477209,
+            29661472485
+          ],
+          "samples_ts": [
+            4.34449,
+            4.32645,
+            4.31536
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 972
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:12:55.210792+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:06:30Z\",\n    \"avg_ns\": 6470667386,\n    \"stddev_ns\": 22823280,\n    \"avg_ts\": 19.781740,\n    \"stddev_ts\": 0.069633,\n    \"samples_ns\": [ 6456546096, 6458457743, 6496998319 ],\n    \"samples_ts\": [ 19.8248, 19.819, 19.7014 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:06:56Z\",\n    \"avg_ns\": 119451362066,\n    \"stddev_ns\": 211694485,\n    \"avg_ts\": 4.286272,\n    \"stddev_ts\": 0.007604,\n    \"samples_ns\": [ 119563977244, 119207164397, 119582944559 ],\n    \"samples_ts\": [ 4.28223, 4.29504, 4.28155 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:06:30Z",
+          "avg_ns": 6470667386,
+          "stddev_ns": 22823280,
+          "avg_ts": 19.78174,
+          "stddev_ts": 0.069633,
+          "samples_ns": [
+            6456546096,
+            6458457743,
+            6496998319
+          ],
+          "samples_ts": [
+            19.8248,
+            19.819,
+            19.7014
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:06:56Z",
+          "avg_ns": 119451362066,
+          "stddev_ns": 211694485,
+          "avg_ts": 4.286272,
+          "stddev_ts": 0.007604,
+          "samples_ns": [
+            119563977244,
+            119207164397,
+            119582944559
+          ],
+          "samples_ts": [
+            4.28223,
+            4.29504,
+            4.28155
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 973
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:16:10.233788+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:12:57Z\",\n    \"avg_ns\": 25929759129,\n    \"stddev_ns\": 39647502,\n    \"avg_ts\": 19.745683,\n    \"stddev_ts\": 0.030202,\n    \"samples_ns\": [ 25966195843, 25935544914, 25887536630 ],\n    \"samples_ts\": [ 19.7179, 19.7412, 19.7779 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:14:41Z\",\n    \"avg_ns\": 29606901453,\n    \"stddev_ns\": 29295053,\n    \"avg_ts\": 4.323319,\n    \"stddev_ts\": 0.004277,\n    \"samples_ns\": [ 29605160040, 29637027358, 29578516963 ],\n    \"samples_ts\": [ 4.32357, 4.31892, 4.32747 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:12:57Z",
+          "avg_ns": 25929759129,
+          "stddev_ns": 39647502,
+          "avg_ts": 19.745683,
+          "stddev_ts": 0.030202,
+          "samples_ns": [
+            25966195843,
+            25935544914,
+            25887536630
+          ],
+          "samples_ts": [
+            19.7179,
+            19.7412,
+            19.7779
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:14:41Z",
+          "avg_ns": 29606901453,
+          "stddev_ns": 29295053,
+          "avg_ts": 4.323319,
+          "stddev_ts": 0.004277,
+          "samples_ns": [
+            29605160040,
+            29637027358,
+            29578516963
+          ],
+          "samples_ts": [
+            4.32357,
+            4.31892,
+            4.32747
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 974
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:23:55.675290+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:16:12Z\",\n    \"avg_ns\": 25932183472,\n    \"stddev_ns\": 3331027,\n    \"avg_ts\": 19.743806,\n    \"stddev_ts\": 0.002530,\n    \"samples_ns\": [ 25936020555, 25930226433, 25930303430 ],\n    \"samples_ts\": [ 19.7409, 19.7453, 19.7452 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:17:56Z\",\n    \"avg_ns\": 119719980415,\n    \"stddev_ns\": 230534733,\n    \"avg_ts\": 4.276657,\n    \"stddev_ts\": 0.008226,\n    \"samples_ns\": [ 119589728463, 119584054544, 119986158240 ],\n    \"samples_ts\": [ 4.2813, 4.28151, 4.26716 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:16:12Z",
+          "avg_ns": 25932183472,
+          "stddev_ns": 3331027,
+          "avg_ts": 19.743806,
+          "stddev_ts": 0.00253,
+          "samples_ns": [
+            25936020555,
+            25930226433,
+            25930303430
+          ],
+          "samples_ts": [
+            19.7409,
+            19.7453,
+            19.7452
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:17:56Z",
+          "avg_ns": 119719980415,
+          "stddev_ns": 230534733,
+          "avg_ts": 4.276657,
+          "stddev_ts": 0.008226,
+          "samples_ns": [
+            119589728463,
+            119584054544,
+            119986158240
+          ],
+          "samples_ts": [
+            4.2813,
+            4.28151,
+            4.26716
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 975
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:25:52.803580+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:23:57Z\",\n    \"avg_ns\": 6452937847,\n    \"stddev_ns\": 2437295,\n    \"avg_ts\": 19.835928,\n    \"stddev_ts\": 0.007483,\n    \"samples_ns\": [ 6452034431, 6455695067, 6451084045 ],\n    \"samples_ts\": [ 19.8387, 19.8275, 19.8416 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:24:23Z\",\n    \"avg_ns\": 29595549186,\n    \"stddev_ns\": 83637761,\n    \"avg_ts\": 4.324998,\n    \"stddev_ts\": 0.012232,\n    \"samples_ns\": [ 29670404110, 29610968402, 29505275048 ],\n    \"samples_ts\": [ 4.31406, 4.32272, 4.33821 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:23:57Z",
+          "avg_ns": 6452937847,
+          "stddev_ns": 2437295,
+          "avg_ts": 19.835928,
+          "stddev_ts": 0.007483,
+          "samples_ns": [
+            6452034431,
+            6455695067,
+            6451084045
+          ],
+          "samples_ts": [
+            19.8387,
+            19.8275,
+            19.8416
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:24:23Z",
+          "avg_ns": 29595549186,
+          "stddev_ns": 83637761,
+          "avg_ts": 4.324998,
+          "stddev_ts": 0.012232,
+          "samples_ns": [
+            29670404110,
+            29610968402,
+            29505275048
+          ],
+          "samples_ts": [
+            4.31406,
+            4.32272,
+            4.33821
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 976
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:32:20.082875+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:25:54Z\",\n    \"avg_ns\": 6465186223,\n    \"stddev_ns\": 18362825,\n    \"avg_ts\": 19.798453,\n    \"stddev_ts\": 0.056223,\n    \"samples_ns\": [ 6483881621, 6464501204, 6447175846 ],\n    \"samples_ts\": [ 19.7413, 19.8004, 19.8537 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:26:20Z\",\n    \"avg_ns\": 119645130163,\n    \"stddev_ns\": 156079853,\n    \"avg_ts\": 4.279327,\n    \"stddev_ts\": 0.005578,\n    \"samples_ns\": [ 119825121968, 119563079912, 119547188609 ],\n    \"samples_ts\": [ 4.27289, 4.28226, 4.28283 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:25:54Z",
+          "avg_ns": 6465186223,
+          "stddev_ns": 18362825,
+          "avg_ts": 19.798453,
+          "stddev_ts": 0.056223,
+          "samples_ns": [
+            6483881621,
+            6464501204,
+            6447175846
+          ],
+          "samples_ts": [
+            19.7413,
+            19.8004,
+            19.8537
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:26:20Z",
+          "avg_ns": 119645130163,
+          "stddev_ns": 156079853,
+          "avg_ts": 4.279327,
+          "stddev_ts": 0.005578,
+          "samples_ns": [
+            119825121968,
+            119563079912,
+            119547188609
+          ],
+          "samples_ts": [
+            4.27289,
+            4.28226,
+            4.28283
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 977
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:35:35.606045+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:32:22Z\",\n    \"avg_ns\": 26087362106,\n    \"stddev_ns\": 13930638,\n    \"avg_ts\": 19.626365,\n    \"stddev_ts\": 0.010477,\n    \"samples_ns\": [ 26076929791, 26081975826, 26103180702 ],\n    \"samples_ts\": [ 19.6342, 19.6304, 19.6145 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:34:06Z\",\n    \"avg_ns\": 29551221569,\n    \"stddev_ns\": 37152501,\n    \"avg_ts\": 4.331467,\n    \"stddev_ts\": 0.005449,\n    \"samples_ns\": [ 29509135496, 29579463292, 29565065921 ],\n    \"samples_ts\": [ 4.33764, 4.32733, 4.32943 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:32:22Z",
+          "avg_ns": 26087362106,
+          "stddev_ns": 13930638,
+          "avg_ts": 19.626365,
+          "stddev_ts": 0.010477,
+          "samples_ns": [
+            26076929791,
+            26081975826,
+            26103180702
+          ],
+          "samples_ts": [
+            19.6342,
+            19.6304,
+            19.6145
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:34:06Z",
+          "avg_ns": 29551221569,
+          "stddev_ns": 37152501,
+          "avg_ts": 4.331467,
+          "stddev_ts": 0.005449,
+          "samples_ns": [
+            29509135496,
+            29579463292,
+            29565065921
+          ],
+          "samples_ts": [
+            4.33764,
+            4.32733,
+            4.32943
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 978
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:43:21.366864+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:35:37Z\",\n    \"avg_ns\": 26095257920,\n    \"stddev_ns\": 19337584,\n    \"avg_ts\": 19.620430,\n    \"stddev_ts\": 0.014539,\n    \"samples_ns\": [ 26114780639, 26076112336, 26094880786 ],\n    \"samples_ts\": [ 19.6058, 19.6348, 19.6207 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:37:22Z\",\n    \"avg_ns\": 119611135438,\n    \"stddev_ns\": 102123457,\n    \"avg_ts\": 4.280540,\n    \"stddev_ts\": 0.003653,\n    \"samples_ns\": [ 119725779893, 119577720692, 119529905730 ],\n    \"samples_ts\": [ 4.27644, 4.28173, 4.28345 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:35:37Z",
+          "avg_ns": 26095257920,
+          "stddev_ns": 19337584,
+          "avg_ts": 19.62043,
+          "stddev_ts": 0.014539,
+          "samples_ns": [
+            26114780639,
+            26076112336,
+            26094880786
+          ],
+          "samples_ts": [
+            19.6058,
+            19.6348,
+            19.6207
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:37:22Z",
+          "avg_ns": 119611135438,
+          "stddev_ns": 102123457,
+          "avg_ts": 4.28054,
+          "stddev_ts": 0.003653,
+          "samples_ns": [
+            119725779893,
+            119577720692,
+            119529905730
+          ],
+          "samples_ts": [
+            4.27644,
+            4.28173,
+            4.28345
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 979
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:45:18.303200+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:43:23Z\",\n    \"avg_ns\": 6456026354,\n    \"stddev_ns\": 11385010,\n    \"avg_ts\": 19.826478,\n    \"stddev_ts\": 0.034961,\n    \"samples_ns\": [ 6467451989, 6455943520, 6444683555 ],\n    \"samples_ts\": [ 19.7914, 19.8267, 19.8613 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:43:49Z\",\n    \"avg_ns\": 29545970203,\n    \"stddev_ns\": 33389359,\n    \"avg_ts\": 4.332236,\n    \"stddev_ts\": 0.004894,\n    \"samples_ns\": [ 29582709506, 29517478400, 29537722705 ],\n    \"samples_ts\": [ 4.32685, 4.33641, 4.33344 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:43:23Z",
+          "avg_ns": 6456026354,
+          "stddev_ns": 11385010,
+          "avg_ts": 19.826478,
+          "stddev_ts": 0.034961,
+          "samples_ns": [
+            6467451989,
+            6455943520,
+            6444683555
+          ],
+          "samples_ts": [
+            19.7914,
+            19.8267,
+            19.8613
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:43:49Z",
+          "avg_ns": 29545970203,
+          "stddev_ns": 33389359,
+          "avg_ts": 4.332236,
+          "stddev_ts": 0.004894,
+          "samples_ns": [
+            29582709506,
+            29517478400,
+            29537722705
+          ],
+          "samples_ts": [
+            4.32685,
+            4.33641,
+            4.33344
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 980
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:51:45.724424+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:45:20Z\",\n    \"avg_ns\": 6456802416,\n    \"stddev_ns\": 22865134,\n    \"avg_ts\": 19.824220,\n    \"stddev_ts\": 0.070091,\n    \"samples_ns\": [ 6449078497, 6438799708, 6482529044 ],\n    \"samples_ts\": [ 19.8478, 19.8795, 19.7454 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:45:46Z\",\n    \"avg_ns\": 119683230976,\n    \"stddev_ns\": 91006109,\n    \"avg_ts\": 4.277961,\n    \"stddev_ts\": 0.003252,\n    \"samples_ns\": [ 119606495645, 119783774832, 119659422451 ],\n    \"samples_ts\": [ 4.2807, 4.27437, 4.27881 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:45:20Z",
+          "avg_ns": 6456802416,
+          "stddev_ns": 22865134,
+          "avg_ts": 19.82422,
+          "stddev_ts": 0.070091,
+          "samples_ns": [
+            6449078497,
+            6438799708,
+            6482529044
+          ],
+          "samples_ts": [
+            19.8478,
+            19.8795,
+            19.7454
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:45:46Z",
+          "avg_ns": 119683230976,
+          "stddev_ns": 91006109,
+          "avg_ts": 4.277961,
+          "stddev_ts": 0.003252,
+          "samples_ns": [
+            119606495645,
+            119783774832,
+            119659422451
+          ],
+          "samples_ts": [
+            4.2807,
+            4.27437,
+            4.27881
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 981
+    },
+    {
+      "timestamp_utc": "2025-12-10T04:55:02.932419+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:51:47Z\",\n    \"avg_ns\": 26478247927,\n    \"stddev_ns\": 11168512,\n    \"avg_ts\": 19.336629,\n    \"stddev_ts\": 0.008157,\n    \"samples_ns\": [ 26483300625, 26465447248, 26485995909 ],\n    \"samples_ts\": [ 19.3329, 19.346, 19.331 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:53:33Z\",\n    \"avg_ns\": 29596097611,\n    \"stddev_ns\": 39151305,\n    \"avg_ts\": 4.324900,\n    \"stddev_ts\": 0.005725,\n    \"samples_ns\": [ 29614572258, 29622592362, 29551128215 ],\n    \"samples_ts\": [ 4.3222, 4.32103, 4.33148 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:51:47Z",
+          "avg_ns": 26478247927,
+          "stddev_ns": 11168512,
+          "avg_ts": 19.336629,
+          "stddev_ts": 0.008157,
+          "samples_ns": [
+            26483300625,
+            26465447248,
+            26485995909
+          ],
+          "samples_ts": [
+            19.3329,
+            19.346,
+            19.331
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:53:33Z",
+          "avg_ns": 29596097611,
+          "stddev_ns": 39151305,
+          "avg_ts": 4.3249,
+          "stddev_ts": 0.005725,
+          "samples_ns": [
+            29614572258,
+            29622592362,
+            29551128215
+          ],
+          "samples_ts": [
+            4.3222,
+            4.32103,
+            4.33148
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 982
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:02:50.597263+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:55:05Z\",\n    \"avg_ns\": 26477762503,\n    \"stddev_ns\": 9769962,\n    \"avg_ts\": 19.336983,\n    \"stddev_ts\": 0.007134,\n    \"samples_ns\": [ 26488838827, 26474078646, 26470370036 ],\n    \"samples_ts\": [ 19.3289, 19.3397, 19.3424 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T04:56:51Z\",\n    \"avg_ns\": 119746387476,\n    \"stddev_ns\": 97153107,\n    \"avg_ts\": 4.275705,\n    \"stddev_ts\": 0.003471,\n    \"samples_ns\": [ 119634204742, 119802435725, 119802521961 ],\n    \"samples_ts\": [ 4.27971, 4.2737, 4.2737 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:55:05Z",
+          "avg_ns": 26477762503,
+          "stddev_ns": 9769962,
+          "avg_ts": 19.336983,
+          "stddev_ts": 0.007134,
+          "samples_ns": [
+            26488838827,
+            26474078646,
+            26470370036
+          ],
+          "samples_ts": [
+            19.3289,
+            19.3397,
+            19.3424
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T04:56:51Z",
+          "avg_ns": 119746387476,
+          "stddev_ns": 97153107,
+          "avg_ts": 4.275705,
+          "stddev_ts": 0.003471,
+          "samples_ns": [
+            119634204742,
+            119802435725,
+            119802521961
+          ],
+          "samples_ts": [
+            4.27971,
+            4.2737,
+            4.2737
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 983
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:04:47.994900+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:02:52Z\",\n    \"avg_ns\": 6458054530,\n    \"stddev_ns\": 7759440,\n    \"avg_ts\": 19.820230,\n    \"stddev_ts\": 0.023798,\n    \"samples_ns\": [ 6453374919, 6467011348, 6453777323 ],\n    \"samples_ts\": [ 19.8346, 19.7928, 19.8333 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:03:18Z\",\n    \"avg_ns\": 29695907932,\n    \"stddev_ns\": 76054982,\n    \"avg_ts\": 4.310377,\n    \"stddev_ts\": 0.011055,\n    \"samples_ns\": [ 29747824332, 29731292017, 29608607447 ],\n    \"samples_ts\": [ 4.30284, 4.30523, 4.32307 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:02:52Z",
+          "avg_ns": 6458054530,
+          "stddev_ns": 7759440,
+          "avg_ts": 19.82023,
+          "stddev_ts": 0.023798,
+          "samples_ns": [
+            6453374919,
+            6467011348,
+            6453777323
+          ],
+          "samples_ts": [
+            19.8346,
+            19.7928,
+            19.8333
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:03:18Z",
+          "avg_ns": 29695907932,
+          "stddev_ns": 76054982,
+          "avg_ts": 4.310377,
+          "stddev_ts": 0.011055,
+          "samples_ns": [
+            29747824332,
+            29731292017,
+            29608607447
+          ],
+          "samples_ts": [
+            4.30284,
+            4.30523,
+            4.32307
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 984
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:11:15.843169+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:04:50Z\",\n    \"avg_ns\": 6445160784,\n    \"stddev_ns\": 5489670,\n    \"avg_ts\": 19.859871,\n    \"stddev_ts\": 0.016908,\n    \"samples_ns\": [ 6451432831, 6441229426, 6442820095 ],\n    \"samples_ts\": [ 19.8406, 19.872, 19.8671 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:05:15Z\",\n    \"avg_ns\": 119830584863,\n    \"stddev_ns\": 120856508,\n    \"avg_ts\": 4.272702,\n    \"stddev_ts\": 0.004307,\n    \"samples_ns\": [ 119738720286, 119967495008, 119785539295 ],\n    \"samples_ts\": [ 4.27598, 4.26782, 4.27431 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:04:50Z",
+          "avg_ns": 6445160784,
+          "stddev_ns": 5489670,
+          "avg_ts": 19.859871,
+          "stddev_ts": 0.016908,
+          "samples_ns": [
+            6451432831,
+            6441229426,
+            6442820095
+          ],
+          "samples_ts": [
+            19.8406,
+            19.872,
+            19.8671
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:05:15Z",
+          "avg_ns": 119830584863,
+          "stddev_ns": 120856508,
+          "avg_ts": 4.272702,
+          "stddev_ts": 0.004307,
+          "samples_ns": [
+            119738720286,
+            119967495008,
+            119785539295
+          ],
+          "samples_ts": [
+            4.27598,
+            4.26782,
+            4.27431
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 985
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:14:31.077553+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:11:18Z\",\n    \"avg_ns\": 25948685408,\n    \"stddev_ns\": 28697814,\n    \"avg_ts\": 19.731266,\n    \"stddev_ts\": 0.021813,\n    \"samples_ns\": [ 25980456882, 25924645246, 25940954096 ],\n    \"samples_ts\": [ 19.7071, 19.7495, 19.7371 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:13:01Z\",\n    \"avg_ns\": 29614159816,\n    \"stddev_ns\": 39381101,\n    \"avg_ts\": 4.322262,\n    \"stddev_ts\": 0.005745,\n    \"samples_ns\": [ 29605436797, 29657170689, 29579871963 ],\n    \"samples_ts\": [ 4.32353, 4.31599, 4.32727 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:11:18Z",
+          "avg_ns": 25948685408,
+          "stddev_ns": 28697814,
+          "avg_ts": 19.731266,
+          "stddev_ts": 0.021813,
+          "samples_ns": [
+            25980456882,
+            25924645246,
+            25940954096
+          ],
+          "samples_ts": [
+            19.7071,
+            19.7495,
+            19.7371
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:13:01Z",
+          "avg_ns": 29614159816,
+          "stddev_ns": 39381101,
+          "avg_ts": 4.322262,
+          "stddev_ts": 0.005745,
+          "samples_ns": [
+            29605436797,
+            29657170689,
+            29579871963
+          ],
+          "samples_ts": [
+            4.32353,
+            4.31599,
+            4.32727
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 986
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:22:16.402647+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:14:33Z\",\n    \"avg_ns\": 25893229778,\n    \"stddev_ns\": 6854464,\n    \"avg_ts\": 19.773509,\n    \"stddev_ts\": 0.005234,\n    \"samples_ns\": [ 25900898556, 25887699492, 25891091286 ],\n    \"samples_ts\": [ 19.7677, 19.7777, 19.7751 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:16:16Z\",\n    \"avg_ns\": 119736869633,\n    \"stddev_ns\": 135605232,\n    \"avg_ts\": 4.276047,\n    \"stddev_ts\": 0.004845,\n    \"samples_ns\": [ 119586430492, 119774475009, 119849703398 ],\n    \"samples_ts\": [ 4.28142, 4.2747, 4.27202 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:14:33Z",
+          "avg_ns": 25893229778,
+          "stddev_ns": 6854464,
+          "avg_ts": 19.773509,
+          "stddev_ts": 0.005234,
+          "samples_ns": [
+            25900898556,
+            25887699492,
+            25891091286
+          ],
+          "samples_ts": [
+            19.7677,
+            19.7777,
+            19.7751
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:16:16Z",
+          "avg_ns": 119736869633,
+          "stddev_ns": 135605232,
+          "avg_ts": 4.276047,
+          "stddev_ts": 0.004845,
+          "samples_ns": [
+            119586430492,
+            119774475009,
+            119849703398
+          ],
+          "samples_ts": [
+            4.28142,
+            4.2747,
+            4.27202
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 987
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:24:13.318634+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:22:18Z\",\n    \"avg_ns\": 6440912978,\n    \"stddev_ns\": 2043588,\n    \"avg_ts\": 19.872960,\n    \"stddev_ts\": 0.006306,\n    \"samples_ns\": [ 6441426161, 6438661710, 6442651063 ],\n    \"samples_ts\": [ 19.8714, 19.8799, 19.8676 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:22:44Z\",\n    \"avg_ns\": 29532461821,\n    \"stddev_ns\": 16013583,\n    \"avg_ts\": 4.334215,\n    \"stddev_ts\": 0.002350,\n    \"samples_ns\": [ 29516896970, 29531602643, 29548885852 ],\n    \"samples_ts\": [ 4.3365, 4.33434, 4.3318 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:22:18Z",
+          "avg_ns": 6440912978,
+          "stddev_ns": 2043588,
+          "avg_ts": 19.87296,
+          "stddev_ts": 0.006306,
+          "samples_ns": [
+            6441426161,
+            6438661710,
+            6442651063
+          ],
+          "samples_ts": [
+            19.8714,
+            19.8799,
+            19.8676
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:22:44Z",
+          "avg_ns": 29532461821,
+          "stddev_ns": 16013583,
+          "avg_ts": 4.334215,
+          "stddev_ts": 0.00235,
+          "samples_ns": [
+            29516896970,
+            29531602643,
+            29548885852
+          ],
+          "samples_ts": [
+            4.3365,
+            4.33434,
+            4.3318
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 988
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:30:40.457335+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:24:15Z\",\n    \"avg_ns\": 6455051568,\n    \"stddev_ns\": 19900818,\n    \"avg_ts\": 19.829556,\n    \"stddev_ts\": 0.061030,\n    \"samples_ns\": [ 6445425960, 6477935159, 6441793585 ],\n    \"samples_ts\": [ 19.859, 19.7594, 19.8702 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:24:41Z\",\n    \"avg_ns\": 119612426022,\n    \"stddev_ns\": 139355783,\n    \"avg_ts\": 4.280496,\n    \"stddev_ts\": 0.004988,\n    \"samples_ns\": [ 119465004214, 119630278691, 119741995161 ],\n    \"samples_ts\": [ 4.28577, 4.27985, 4.27586 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:24:15Z",
+          "avg_ns": 6455051568,
+          "stddev_ns": 19900818,
+          "avg_ts": 19.829556,
+          "stddev_ts": 0.06103,
+          "samples_ns": [
+            6445425960,
+            6477935159,
+            6441793585
+          ],
+          "samples_ts": [
+            19.859,
+            19.7594,
+            19.8702
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:24:41Z",
+          "avg_ns": 119612426022,
+          "stddev_ns": 139355783,
+          "avg_ts": 4.280496,
+          "stddev_ts": 0.004988,
+          "samples_ns": [
+            119465004214,
+            119630278691,
+            119741995161
+          ],
+          "samples_ts": [
+            4.28577,
+            4.27985,
+            4.27586
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 989
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:33:56.223763+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:30:42Z\",\n    \"avg_ns\": 26084080147,\n    \"stddev_ns\": 9364730,\n    \"avg_ts\": 19.628833,\n    \"stddev_ts\": 0.007047,\n    \"samples_ns\": [ 26087281579, 26091422877, 26073535986 ],\n    \"samples_ts\": [ 19.6264, 19.6233, 19.6368 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:32:26Z\",\n    \"avg_ns\": 29636156317,\n    \"stddev_ns\": 27233993,\n    \"avg_ts\": 4.319051,\n    \"stddev_ts\": 0.003969,\n    \"samples_ns\": [ 29638764355, 29661991926, 29607712671 ],\n    \"samples_ts\": [ 4.31867, 4.31529, 4.3232 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:30:42Z",
+          "avg_ns": 26084080147,
+          "stddev_ns": 9364730,
+          "avg_ts": 19.628833,
+          "stddev_ts": 0.007047,
+          "samples_ns": [
+            26087281579,
+            26091422877,
+            26073535986
+          ],
+          "samples_ts": [
+            19.6264,
+            19.6233,
+            19.6368
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:32:26Z",
+          "avg_ns": 29636156317,
+          "stddev_ns": 27233993,
+          "avg_ts": 4.319051,
+          "stddev_ts": 0.003969,
+          "samples_ns": [
+            29638764355,
+            29661991926,
+            29607712671
+          ],
+          "samples_ts": [
+            4.31867,
+            4.31529,
+            4.3232
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 990
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:41:41.606192+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:33:58Z\",\n    \"avg_ns\": 26106828305,\n    \"stddev_ns\": 8455082,\n    \"avg_ts\": 19.611729,\n    \"stddev_ts\": 0.006350,\n    \"samples_ns\": [ 26112626522, 26110727853, 26097130542 ],\n    \"samples_ts\": [ 19.6074, 19.6088, 19.619 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:35:42Z\",\n    \"avg_ns\": 119474886999,\n    \"stddev_ns\": 184309679,\n    \"avg_ts\": 4.285426,\n    \"stddev_ts\": 0.006609,\n    \"samples_ns\": [ 119449972600, 119670386252, 119304302146 ],\n    \"samples_ts\": [ 4.28631, 4.27842, 4.29155 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:33:58Z",
+          "avg_ns": 26106828305,
+          "stddev_ns": 8455082,
+          "avg_ts": 19.611729,
+          "stddev_ts": 0.00635,
+          "samples_ns": [
+            26112626522,
+            26110727853,
+            26097130542
+          ],
+          "samples_ts": [
+            19.6074,
+            19.6088,
+            19.619
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:35:42Z",
+          "avg_ns": 119474886999,
+          "stddev_ns": 184309679,
+          "avg_ts": 4.285426,
+          "stddev_ts": 0.006609,
+          "samples_ns": [
+            119449972600,
+            119670386252,
+            119304302146
+          ],
+          "samples_ts": [
+            4.28631,
+            4.27842,
+            4.29155
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 991
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:43:38.681094+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:41:43Z\",\n    \"avg_ns\": 6455347610,\n    \"stddev_ns\": 18175939,\n    \"avg_ts\": 19.828626,\n    \"stddev_ts\": 0.055753,\n    \"samples_ns\": [ 6448408272, 6475970778, 6441663781 ],\n    \"samples_ts\": [ 19.8499, 19.7654, 19.8706 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:42:09Z\",\n    \"avg_ns\": 29589701232,\n    \"stddev_ns\": 11510775,\n    \"avg_ts\": 4.325830,\n    \"stddev_ts\": 0.001682,\n    \"samples_ns\": [ 29583952434, 29582197219, 29602954043 ],\n    \"samples_ts\": [ 4.32667, 4.32693, 4.32389 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:41:43Z",
+          "avg_ns": 6455347610,
+          "stddev_ns": 18175939,
+          "avg_ts": 19.828626,
+          "stddev_ts": 0.055753,
+          "samples_ns": [
+            6448408272,
+            6475970778,
+            6441663781
+          ],
+          "samples_ts": [
+            19.8499,
+            19.7654,
+            19.8706
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:42:09Z",
+          "avg_ns": 29589701232,
+          "stddev_ns": 11510775,
+          "avg_ts": 4.32583,
+          "stddev_ts": 0.001682,
+          "samples_ns": [
+            29583952434,
+            29582197219,
+            29602954043
+          ],
+          "samples_ts": [
+            4.32667,
+            4.32693,
+            4.32389
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 992
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:50:05.076258+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:43:40Z\",\n    \"avg_ns\": 6449517325,\n    \"stddev_ns\": 10868918,\n    \"avg_ts\": 19.846484,\n    \"stddev_ts\": 0.033417,\n    \"samples_ns\": [ 6461882746, 6445191825, 6441477405 ],\n    \"samples_ts\": [ 19.8085, 19.8598, 19.8712 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:44:06Z\",\n    \"avg_ns\": 119361069109,\n    \"stddev_ns\": 152594344,\n    \"avg_ts\": 4.289510,\n    \"stddev_ts\": 0.005480,\n    \"samples_ns\": [ 119285765340, 119260764331, 119536677656 ],\n    \"samples_ts\": [ 4.29221, 4.29311, 4.2832 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:43:40Z",
+          "avg_ns": 6449517325,
+          "stddev_ns": 10868918,
+          "avg_ts": 19.846484,
+          "stddev_ts": 0.033417,
+          "samples_ns": [
+            6461882746,
+            6445191825,
+            6441477405
+          ],
+          "samples_ts": [
+            19.8085,
+            19.8598,
+            19.8712
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:44:06Z",
+          "avg_ns": 119361069109,
+          "stddev_ns": 152594344,
+          "avg_ts": 4.28951,
+          "stddev_ts": 0.00548,
+          "samples_ns": [
+            119285765340,
+            119260764331,
+            119536677656
+          ],
+          "samples_ts": [
+            4.29221,
+            4.29311,
+            4.2832
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 993
+    },
+    {
+      "timestamp_utc": "2025-12-10T05:53:22.132240+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:50:07Z\",\n    \"avg_ns\": 26453633204,\n    \"stddev_ns\": 13126444,\n    \"avg_ts\": 19.354622,\n    \"stddev_ts\": 0.009605,\n    \"samples_ns\": [ 26438509704, 26460352090, 26462037820 ],\n    \"samples_ts\": [ 19.3657, 19.3497, 19.3485 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:51:53Z\",\n    \"avg_ns\": 29585555613,\n    \"stddev_ns\": 44070996,\n    \"avg_ts\": 4.326442,\n    \"stddev_ts\": 0.006446,\n    \"samples_ns\": [ 29628247118, 29540224454, 29588195268 ],\n    \"samples_ts\": [ 4.3202, 4.33307, 4.32605 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:50:07Z",
+          "avg_ns": 26453633204,
+          "stddev_ns": 13126444,
+          "avg_ts": 19.354622,
+          "stddev_ts": 0.009605,
+          "samples_ns": [
+            26438509704,
+            26460352090,
+            26462037820
+          ],
+          "samples_ts": [
+            19.3657,
+            19.3497,
+            19.3485
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:51:53Z",
+          "avg_ns": 29585555613,
+          "stddev_ns": 44070996,
+          "avg_ts": 4.326442,
+          "stddev_ts": 0.006446,
+          "samples_ns": [
+            29628247118,
+            29540224454,
+            29588195268
+          ],
+          "samples_ts": [
+            4.3202,
+            4.33307,
+            4.32605
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 994
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:01:09.719644+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:53:24Z\",\n    \"avg_ns\": 26441260239,\n    \"stddev_ns\": 2790583,\n    \"avg_ts\": 19.363676,\n    \"stddev_ts\": 0.002037,\n    \"samples_ns\": [ 26440675380, 26438818089, 26444287250 ],\n    \"samples_ts\": [ 19.3641, 19.3655, 19.3615 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T05:55:10Z\",\n    \"avg_ns\": 119754691206,\n    \"stddev_ns\": 86051323,\n    \"avg_ts\": 4.275408,\n    \"stddev_ts\": 0.003071,\n    \"samples_ns\": [ 119690435555, 119852456206, 119721181857 ],\n    \"samples_ts\": [ 4.2777, 4.27192, 4.2766 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:53:24Z",
+          "avg_ns": 26441260239,
+          "stddev_ns": 2790583,
+          "avg_ts": 19.363676,
+          "stddev_ts": 0.002037,
+          "samples_ns": [
+            26440675380,
+            26438818089,
+            26444287250
+          ],
+          "samples_ts": [
+            19.3641,
+            19.3655,
+            19.3615
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T05:55:10Z",
+          "avg_ns": 119754691206,
+          "stddev_ns": 86051323,
+          "avg_ts": 4.275408,
+          "stddev_ts": 0.003071,
+          "samples_ns": [
+            119690435555,
+            119852456206,
+            119721181857
+          ],
+          "samples_ts": [
+            4.2777,
+            4.27192,
+            4.2766
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 995
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:03:06.902840+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:01:11Z\",\n    \"avg_ns\": 6439757116,\n    \"stddev_ns\": 740079,\n    \"avg_ts\": 19.876526,\n    \"stddev_ts\": 0.002257,\n    \"samples_ns\": [ 6438919401, 6440268239, 6440083710 ],\n    \"samples_ts\": [ 19.8791, 19.8749, 19.8755 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:01:37Z\",\n    \"avg_ns\": 29616112140,\n    \"stddev_ns\": 60154704,\n    \"avg_ts\": 4.321984,\n    \"stddev_ts\": 0.008788,\n    \"samples_ns\": [ 29547067665, 29657209624, 29644059131 ],\n    \"samples_ts\": [ 4.33207, 4.31598, 4.3179 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:01:11Z",
+          "avg_ns": 6439757116,
+          "stddev_ns": 740079,
+          "avg_ts": 19.876526,
+          "stddev_ts": 0.002257,
+          "samples_ns": [
+            6438919401,
+            6440268239,
+            6440083710
+          ],
+          "samples_ts": [
+            19.8791,
+            19.8749,
+            19.8755
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:01:37Z",
+          "avg_ns": 29616112140,
+          "stddev_ns": 60154704,
+          "avg_ts": 4.321984,
+          "stddev_ts": 0.008788,
+          "samples_ns": [
+            29547067665,
+            29657209624,
+            29644059131
+          ],
+          "samples_ts": [
+            4.33207,
+            4.31598,
+            4.3179
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 996
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:09:34.307420+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:03:09Z\",\n    \"avg_ns\": 6463042623,\n    \"stddev_ns\": 10233284,\n    \"avg_ts\": 19.804947,\n    \"stddev_ts\": 0.031329,\n    \"samples_ns\": [ 6474828490, 6457880208, 6456419172 ],\n    \"samples_ts\": [ 19.7689, 19.8207, 19.8252 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:03:34Z\",\n    \"avg_ns\": 119672118897,\n    \"stddev_ns\": 20530175,\n    \"avg_ts\": 4.278357,\n    \"stddev_ts\": 0.000734,\n    \"samples_ns\": [ 119650091544, 119690721044, 119675544103 ],\n    \"samples_ts\": [ 4.27914, 4.27769, 4.27823 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:03:09Z",
+          "avg_ns": 6463042623,
+          "stddev_ns": 10233284,
+          "avg_ts": 19.804947,
+          "stddev_ts": 0.031329,
+          "samples_ns": [
+            6474828490,
+            6457880208,
+            6456419172
+          ],
+          "samples_ts": [
+            19.7689,
+            19.8207,
+            19.8252
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:03:34Z",
+          "avg_ns": 119672118897,
+          "stddev_ns": 20530175,
+          "avg_ts": 4.278357,
+          "stddev_ts": 0.000734,
+          "samples_ns": [
+            119650091544,
+            119690721044,
+            119675544103
+          ],
+          "samples_ts": [
+            4.27914,
+            4.27769,
+            4.27823
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 997
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:12:49.228610+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:09:36Z\",\n    \"avg_ns\": 25909921644,\n    \"stddev_ns\": 2491281,\n    \"avg_ts\": 19.760770,\n    \"stddev_ts\": 0.001896,\n    \"samples_ns\": [ 25909463383, 25912604970, 25907696580 ],\n    \"samples_ts\": [ 19.7611, 19.7587, 19.7625 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:11:20Z\",\n    \"avg_ns\": 29579642830,\n    \"stddev_ns\": 3111463,\n    \"avg_ts\": 4.327300,\n    \"stddev_ts\": 0.000454,\n    \"samples_ns\": [ 29577351495, 29578398109, 29583178887 ],\n    \"samples_ts\": [ 4.32764, 4.32748, 4.32678 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:09:36Z",
+          "avg_ns": 25909921644,
+          "stddev_ns": 2491281,
+          "avg_ts": 19.76077,
+          "stddev_ts": 0.001896,
+          "samples_ns": [
+            25909463383,
+            25912604970,
+            25907696580
+          ],
+          "samples_ts": [
+            19.7611,
+            19.7587,
+            19.7625
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:11:20Z",
+          "avg_ns": 29579642830,
+          "stddev_ns": 3111463,
+          "avg_ts": 4.3273,
+          "stddev_ts": 0.000454,
+          "samples_ns": [
+            29577351495,
+            29578398109,
+            29583178887
+          ],
+          "samples_ts": [
+            4.32764,
+            4.32748,
+            4.32678
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 998
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:20:33.761821+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:12:51Z\",\n    \"avg_ns\": 25910076842,\n    \"stddev_ns\": 7956780,\n    \"avg_ts\": 19.760653,\n    \"stddev_ts\": 0.006069,\n    \"samples_ns\": [ 25917180667, 25901478931, 25911570928 ],\n    \"samples_ts\": [ 19.7552, 19.7672, 19.7595 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:14:35Z\",\n    \"avg_ns\": 119455439677,\n    \"stddev_ns\": 190119494,\n    \"avg_ts\": 4.286124,\n    \"stddev_ts\": 0.006819,\n    \"samples_ns\": [ 119422508585, 119283937566, 119659872882 ],\n    \"samples_ts\": [ 4.2873, 4.29228, 4.27879 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:12:51Z",
+          "avg_ns": 25910076842,
+          "stddev_ns": 7956780,
+          "avg_ts": 19.760653,
+          "stddev_ts": 0.006069,
+          "samples_ns": [
+            25917180667,
+            25901478931,
+            25911570928
+          ],
+          "samples_ts": [
+            19.7552,
+            19.7672,
+            19.7595
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:14:35Z",
+          "avg_ns": 119455439677,
+          "stddev_ns": 190119494,
+          "avg_ts": 4.286124,
+          "stddev_ts": 0.006819,
+          "samples_ns": [
+            119422508585,
+            119283937566,
+            119659872882
+          ],
+          "samples_ts": [
+            4.2873,
+            4.29228,
+            4.27879
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 999
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:22:30.849802+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:20:35Z\",\n    \"avg_ns\": 6453098549,\n    \"stddev_ns\": 15497596,\n    \"avg_ts\": 19.835509,\n    \"stddev_ts\": 0.047577,\n    \"samples_ns\": [ 6470740724, 6441683649, 6446871276 ],\n    \"samples_ts\": [ 19.7814, 19.8706, 19.8546 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:21:01Z\",\n    \"avg_ns\": 29577683987,\n    \"stddev_ns\": 29674272,\n    \"avg_ts\": 4.327590,\n    \"stddev_ts\": 0.004339,\n    \"samples_ns\": [ 29559462102, 29611925282, 29561664577 ],\n    \"samples_ts\": [ 4.33025, 4.32258, 4.32993 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:20:35Z",
+          "avg_ns": 6453098549,
+          "stddev_ns": 15497596,
+          "avg_ts": 19.835509,
+          "stddev_ts": 0.047577,
+          "samples_ns": [
+            6470740724,
+            6441683649,
+            6446871276
+          ],
+          "samples_ts": [
+            19.7814,
+            19.8706,
+            19.8546
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:21:01Z",
+          "avg_ns": 29577683987,
+          "stddev_ns": 29674272,
+          "avg_ts": 4.32759,
+          "stddev_ts": 0.004339,
+          "samples_ns": [
+            29559462102,
+            29611925282,
+            29561664577
+          ],
+          "samples_ts": [
+            4.33025,
+            4.32258,
+            4.32993
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1000
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:28:57.898883+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:22:32Z\",\n    \"avg_ns\": 6552766887,\n    \"stddev_ns\": 186395539,\n    \"avg_ts\": 19.544107,\n    \"stddev_ts\": 0.547016,\n    \"samples_ns\": [ 6438168353, 6452288845, 6767843463 ],\n    \"samples_ts\": [ 19.8814, 19.8379, 18.913 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:22:59Z\",\n    \"avg_ns\": 119469920420,\n    \"stddev_ns\": 120868994,\n    \"avg_ts\": 4.285600,\n    \"stddev_ts\": 0.004333,\n    \"samples_ns\": [ 119416069293, 119608355427, 119385336540 ],\n    \"samples_ts\": [ 4.28753, 4.28064, 4.28863 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:22:32Z",
+          "avg_ns": 6552766887,
+          "stddev_ns": 186395539,
+          "avg_ts": 19.544107,
+          "stddev_ts": 0.547016,
+          "samples_ns": [
+            6438168353,
+            6452288845,
+            6767843463
+          ],
+          "samples_ts": [
+            19.8814,
+            19.8379,
+            18.913
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:22:59Z",
+          "avg_ns": 119469920420,
+          "stddev_ns": 120868994,
+          "avg_ts": 4.2856,
+          "stddev_ts": 0.004333,
+          "samples_ns": [
+            119416069293,
+            119608355427,
+            119385336540
+          ],
+          "samples_ts": [
+            4.28753,
+            4.28064,
+            4.28863
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1001
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:32:13.737286+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:29:00Z\",\n    \"avg_ns\": 26103084214,\n    \"stddev_ns\": 22620029,\n    \"avg_ts\": 19.614550,\n    \"stddev_ts\": 0.016998,\n    \"samples_ns\": [ 26104599664, 26124907258, 26079745722 ],\n    \"samples_ts\": [ 19.6134, 19.5982, 19.6321 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:30:44Z\",\n    \"avg_ns\": 29627558807,\n    \"stddev_ns\": 50003175,\n    \"avg_ts\": 4.320310,\n    \"stddev_ts\": 0.007295,\n    \"samples_ns\": [ 29573185022, 29671566406, 29637924993 ],\n    \"samples_ts\": [ 4.32825, 4.31389, 4.31879 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:29:00Z",
+          "avg_ns": 26103084214,
+          "stddev_ns": 22620029,
+          "avg_ts": 19.61455,
+          "stddev_ts": 0.016998,
+          "samples_ns": [
+            26104599664,
+            26124907258,
+            26079745722
+          ],
+          "samples_ts": [
+            19.6134,
+            19.5982,
+            19.6321
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:30:44Z",
+          "avg_ns": 29627558807,
+          "stddev_ns": 50003175,
+          "avg_ts": 4.32031,
+          "stddev_ts": 0.007295,
+          "samples_ns": [
+            29573185022,
+            29671566406,
+            29637924993
+          ],
+          "samples_ts": [
+            4.32825,
+            4.31389,
+            4.31879
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1002
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:39:59.874441+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:32:15Z\",\n    \"avg_ns\": 26090830583,\n    \"stddev_ns\": 19807813,\n    \"avg_ts\": 19.623760,\n    \"stddev_ts\": 0.014898,\n    \"samples_ns\": [ 26110186112, 26070601993, 26091703646 ],\n    \"samples_ts\": [ 19.6092, 19.639, 19.6231 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:34:00Z\",\n    \"avg_ns\": 119760472752,\n    \"stddev_ns\": 265540414,\n    \"avg_ts\": 4.275214,\n    \"stddev_ts\": 0.009479,\n    \"samples_ns\": [ 119761553642, 120025470622, 119494393994 ],\n    \"samples_ts\": [ 4.27516, 4.26576, 4.28472 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:32:15Z",
+          "avg_ns": 26090830583,
+          "stddev_ns": 19807813,
+          "avg_ts": 19.62376,
+          "stddev_ts": 0.014898,
+          "samples_ns": [
+            26110186112,
+            26070601993,
+            26091703646
+          ],
+          "samples_ts": [
+            19.6092,
+            19.639,
+            19.6231
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:34:00Z",
+          "avg_ns": 119760472752,
+          "stddev_ns": 265540414,
+          "avg_ts": 4.275214,
+          "stddev_ts": 0.009479,
+          "samples_ns": [
+            119761553642,
+            120025470622,
+            119494393994
+          ],
+          "samples_ts": [
+            4.27516,
+            4.26576,
+            4.28472
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1003
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:41:57.059258+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:40:02Z\",\n    \"avg_ns\": 6452750875,\n    \"stddev_ns\": 16422609,\n    \"avg_ts\": 19.836587,\n    \"stddev_ts\": 0.050410,\n    \"samples_ns\": [ 6442823960, 6443722141, 6471706526 ],\n    \"samples_ts\": [ 19.8671, 19.8643, 19.7784 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:40:27Z\",\n    \"avg_ns\": 29612172617,\n    \"stddev_ns\": 55953943,\n    \"avg_ts\": 4.322557,\n    \"stddev_ts\": 0.008177,\n    \"samples_ns\": [ 29643447909, 29645495943, 29547574001 ],\n    \"samples_ts\": [ 4.31799, 4.31769, 4.332 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:40:02Z",
+          "avg_ns": 6452750875,
+          "stddev_ns": 16422609,
+          "avg_ts": 19.836587,
+          "stddev_ts": 0.05041,
+          "samples_ns": [
+            6442823960,
+            6443722141,
+            6471706526
+          ],
+          "samples_ts": [
+            19.8671,
+            19.8643,
+            19.7784
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:40:27Z",
+          "avg_ns": 29612172617,
+          "stddev_ns": 55953943,
+          "avg_ts": 4.322557,
+          "stddev_ts": 0.008177,
+          "samples_ns": [
+            29643447909,
+            29645495943,
+            29547574001
+          ],
+          "samples_ts": [
+            4.31799,
+            4.31769,
+            4.332
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1004
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:48:25.151732+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:41:59Z\",\n    \"avg_ns\": 6443462655,\n    \"stddev_ns\": 10980135,\n    \"avg_ts\": 19.865134,\n    \"stddev_ts\": 0.033836,\n    \"samples_ns\": [ 6433671946, 6455334391, 6441381628 ],\n    \"samples_ts\": [ 19.8953, 19.8286, 19.8715 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:42:25Z\",\n    \"avg_ns\": 119919899574,\n    \"stddev_ns\": 253759480,\n    \"avg_ts\": 4.269529,\n    \"stddev_ts\": 0.009025,\n    \"samples_ns\": [ 119812749794, 119737290071, 120209658857 ],\n    \"samples_ts\": [ 4.27333, 4.27603, 4.25923 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:41:59Z",
+          "avg_ns": 6443462655,
+          "stddev_ns": 10980135,
+          "avg_ts": 19.865134,
+          "stddev_ts": 0.033836,
+          "samples_ns": [
+            6433671946,
+            6455334391,
+            6441381628
+          ],
+          "samples_ts": [
+            19.8953,
+            19.8286,
+            19.8715
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:42:25Z",
+          "avg_ns": 119919899574,
+          "stddev_ns": 253759480,
+          "avg_ts": 4.269529,
+          "stddev_ts": 0.009025,
+          "samples_ns": [
+            119812749794,
+            119737290071,
+            120209658857
+          ],
+          "samples_ts": [
+            4.27333,
+            4.27603,
+            4.25923
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1005
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:51:42.531034+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:48:27Z\",\n    \"avg_ns\": 26465184244,\n    \"stddev_ns\": 9458368,\n    \"avg_ts\": 19.346173,\n    \"stddev_ts\": 0.006913,\n    \"samples_ns\": [ 26475696797, 26462492223, 26457363712 ],\n    \"samples_ts\": [ 19.3385, 19.3481, 19.3519 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:50:13Z\",\n    \"avg_ns\": 29653063067,\n    \"stddev_ns\": 90688243,\n    \"avg_ts\": 4.316613,\n    \"stddev_ts\": 0.013179,\n    \"samples_ns\": [ 29593035786, 29757386008, 29608767408 ],\n    \"samples_ts\": [ 4.32534, 4.30145, 4.32304 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:48:27Z",
+          "avg_ns": 26465184244,
+          "stddev_ns": 9458368,
+          "avg_ts": 19.346173,
+          "stddev_ts": 0.006913,
+          "samples_ns": [
+            26475696797,
+            26462492223,
+            26457363712
+          ],
+          "samples_ts": [
+            19.3385,
+            19.3481,
+            19.3519
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:50:13Z",
+          "avg_ns": 29653063067,
+          "stddev_ns": 90688243,
+          "avg_ts": 4.316613,
+          "stddev_ts": 0.013179,
+          "samples_ns": [
+            29593035786,
+            29757386008,
+            29608767408
+          ],
+          "samples_ts": [
+            4.32534,
+            4.30145,
+            4.32304
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1006
+    },
+    {
+      "timestamp_utc": "2025-12-10T06:59:29.907087+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:51:44Z\",\n    \"avg_ns\": 26466486527,\n    \"stddev_ns\": 9883760,\n    \"avg_ts\": 19.345222,\n    \"stddev_ts\": 0.007222,\n    \"samples_ns\": [ 26457214580, 26465364114, 26476880889 ],\n    \"samples_ts\": [ 19.352, 19.346, 19.3376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n    \"model_type\": \"gemma3 4B Q4_K - Medium\",\n    \"model_size\": 2483352832,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T06:53:30Z\",\n    \"avg_ns\": 119654388823,\n    \"stddev_ns\": 242915973,\n    \"avg_ts\": 4.279002,\n    \"stddev_ts\": 0.008679,\n    \"samples_ns\": [ 119929263891, 119468569234, 119565333345 ],\n    \"samples_ts\": [ 4.26918, 4.28565, 4.28218 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:51:44Z",
+          "avg_ns": 26466486527,
+          "stddev_ns": 9883760,
+          "avg_ts": 19.345222,
+          "stddev_ts": 0.007222,
+          "samples_ns": [
+            26457214580,
+            26465364114,
+            26476880889
+          ],
+          "samples_ts": [
+            19.352,
+            19.346,
+            19.3376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+          "model_type": "gemma3 4B Q4_K - Medium",
+          "model_size": 2483352832,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T06:53:30Z",
+          "avg_ns": 119654388823,
+          "stddev_ns": 242915973,
+          "avg_ts": 4.279002,
+          "stddev_ts": 0.008679,
+          "samples_ns": [
+            119929263891,
+            119468569234,
+            119565333345
+          ],
+          "samples_ts": [
+            4.26918,
+            4.28565,
+            4.28218
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q4_K_M",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1007
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:04:52.678948+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:00:27Z\",\n    \"avg_ns\": 28354921650,\n    \"stddev_ns\": 20104684,\n    \"avg_ts\": 4.514209,\n    \"stddev_ts\": 0.003200,\n    \"samples_ns\": [ 28377742727, 28347194703, 28339827521 ],\n    \"samples_ts\": [ 4.51058, 4.51544, 4.51661 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:02:27Z\",\n    \"avg_ns\": 48323447678,\n    \"stddev_ns\": 15676503,\n    \"avg_ts\": 2.648818,\n    \"stddev_ts\": 0.000859,\n    \"samples_ns\": [ 48324053948, 48338810710, 48307478377 ],\n    \"samples_ts\": [ 2.64878, 2.64798, 2.64969 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:00:27Z",
+          "avg_ns": 28354921650,
+          "stddev_ns": 20104684,
+          "avg_ts": 4.514209,
+          "stddev_ts": 0.0032,
+          "samples_ns": [
+            28377742727,
+            28347194703,
+            28339827521
+          ],
+          "samples_ts": [
+            4.51058,
+            4.51544,
+            4.51661
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:02:27Z",
+          "avg_ns": 48323447678,
+          "stddev_ns": 15676503,
+          "avg_ts": 2.648818,
+          "stddev_ts": 0.000859,
+          "samples_ns": [
+            48324053948,
+            48338810710,
+            48307478377
+          ],
+          "samples_ts": [
+            2.64878,
+            2.64798,
+            2.64969
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1008
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:16:31.690798+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:04:53Z\",\n    \"avg_ns\": 28288675998,\n    \"stddev_ns\": 890264,\n    \"avg_ts\": 4.524779,\n    \"stddev_ts\": 0.000137,\n    \"samples_ns\": [ 28289559138, 28287845788, 28288623070 ],\n    \"samples_ts\": [ 4.52464, 4.52491, 4.52479 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:06:47Z\",\n    \"avg_ns\": 194716588535,\n    \"stddev_ns\": 17888675,\n    \"avg_ts\": 2.629463,\n    \"stddev_ts\": 0.000241,\n    \"samples_ns\": [ 194737229674, 194706537831, 194705998102 ],\n    \"samples_ts\": [ 2.62918, 2.6296, 2.62961 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:04:53Z",
+          "avg_ns": 28288675998,
+          "stddev_ns": 890264,
+          "avg_ts": 4.524779,
+          "stddev_ts": 0.000137,
+          "samples_ns": [
+            28289559138,
+            28287845788,
+            28288623070
+          ],
+          "samples_ts": [
+            4.52464,
+            4.52491,
+            4.52479
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:06:47Z",
+          "avg_ns": 194716588535,
+          "stddev_ns": 17888675,
+          "avg_ts": 2.629463,
+          "stddev_ts": 0.000241,
+          "samples_ns": [
+            194737229674,
+            194706537831,
+            194705998102
+          ],
+          "samples_ts": [
+            2.62918,
+            2.6296,
+            2.62961
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1009
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:26:31.070568+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:16:32Z\",\n    \"avg_ns\": 113862299153,\n    \"stddev_ns\": 2035882,\n    \"avg_ts\": 4.496660,\n    \"stddev_ts\": 0.000079,\n    \"samples_ns\": [ 113863827744, 113863044332, 113860025384 ],\n    \"samples_ts\": [ 4.4966, 4.49663, 4.49675 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:24:08Z\",\n    \"avg_ns\": 47432570678,\n    \"stddev_ns\": 18083628,\n    \"avg_ts\": 2.698568,\n    \"stddev_ts\": 0.001029,\n    \"samples_ns\": [ 47452427271, 47428237414, 47417047349 ],\n    \"samples_ts\": [ 2.69744, 2.69881, 2.69945 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:16:32Z",
+          "avg_ns": 113862299153,
+          "stddev_ns": 2035882,
+          "avg_ts": 4.49666,
+          "stddev_ts": 7.9e-05,
+          "samples_ns": [
+            113863827744,
+            113863044332,
+            113860025384
+          ],
+          "samples_ts": [
+            4.4966,
+            4.49663,
+            4.49675
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:24:08Z",
+          "avg_ns": 47432570678,
+          "stddev_ns": 18083628,
+          "avg_ts": 2.698568,
+          "stddev_ts": 0.001029,
+          "samples_ns": [
+            47452427271,
+            47428237414,
+            47417047349
+          ],
+          "samples_ts": [
+            2.69744,
+            2.69881,
+            2.69945
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1010
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:43:48.234139+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:26:32Z\",\n    \"avg_ns\": 113896241818,\n    \"stddev_ns\": 2372456,\n    \"avg_ts\": 4.495320,\n    \"stddev_ts\": 0.000093,\n    \"samples_ns\": [ 113898710986, 113894036635, 113895977834 ],\n    \"samples_ts\": [ 4.49522, 4.49541, 4.49533 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:34:07Z\",\n    \"avg_ns\": 193320552066,\n    \"stddev_ns\": 5959073,\n    \"avg_ts\": 2.648451,\n    \"stddev_ts\": 0.000082,\n    \"samples_ns\": [ 193327340328, 193318132706, 193316183164 ],\n    \"samples_ts\": [ 2.64836, 2.64848, 2.64851 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:26:32Z",
+          "avg_ns": 113896241818,
+          "stddev_ns": 2372456,
+          "avg_ts": 4.49532,
+          "stddev_ts": 9.3e-05,
+          "samples_ns": [
+            113898710986,
+            113894036635,
+            113895977834
+          ],
+          "samples_ts": [
+            4.49522,
+            4.49541,
+            4.49533
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:34:07Z",
+          "avg_ns": 193320552066,
+          "stddev_ns": 5959073,
+          "avg_ts": 2.648451,
+          "stddev_ts": 8.2e-05,
+          "samples_ns": [
+            193327340328,
+            193318132706,
+            193316183164
+          ],
+          "samples_ts": [
+            2.64836,
+            2.64848,
+            2.64851
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1011
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:48:07.906449+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:43:49Z\",\n    \"avg_ns\": 28285784344,\n    \"stddev_ns\": 289772,\n    \"avg_ts\": 4.525241,\n    \"stddev_ts\": 0.000026,\n    \"samples_ns\": [ 28285830675, 28285921762, 28285600597 ],\n    \"samples_ts\": [ 4.52523, 4.52522, 4.52527 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:45:42Z\",\n    \"avg_ns\": 48294141795,\n    \"stddev_ns\": 5758460,\n    \"avg_ts\": 2.650425,\n    \"stddev_ts\": 0.000316,\n    \"samples_ns\": [ 48300770368, 48290429901, 48291225117 ],\n    \"samples_ts\": [ 2.65006, 2.65063, 2.65059 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:43:49Z",
+          "avg_ns": 28285784344,
+          "stddev_ns": 289772,
+          "avg_ts": 4.525241,
+          "stddev_ts": 2.6e-05,
+          "samples_ns": [
+            28285830675,
+            28285921762,
+            28285600597
+          ],
+          "samples_ts": [
+            4.52523,
+            4.52522,
+            4.52527
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:45:42Z",
+          "avg_ns": 48294141795,
+          "stddev_ns": 5758460,
+          "avg_ts": 2.650425,
+          "stddev_ts": 0.000316,
+          "samples_ns": [
+            48300770368,
+            48290429901,
+            48291225117
+          ],
+          "samples_ts": [
+            2.65006,
+            2.65063,
+            2.65059
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1012
+    },
+    {
+      "timestamp_utc": "2025-12-10T07:59:40.402349+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:48:08Z\",\n    \"avg_ns\": 28306082787,\n    \"stddev_ns\": 913077,\n    \"avg_ts\": 4.521996,\n    \"stddev_ts\": 0.000141,\n    \"samples_ns\": [ 28305941258, 28307026522, 28305280583 ],\n    \"samples_ts\": [ 4.52202, 4.52185, 4.52212 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:50:02Z\",\n    \"avg_ns\": 192554860831,\n    \"stddev_ns\": 1938616,\n    \"avg_ts\": 2.658982,\n    \"stddev_ts\": 0.000027,\n    \"samples_ns\": [ 192552643629, 192556236352, 192555702512 ],\n    \"samples_ts\": [ 2.65901, 2.65896, 2.65897 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:48:08Z",
+          "avg_ns": 28306082787,
+          "stddev_ns": 913077,
+          "avg_ts": 4.521996,
+          "stddev_ts": 0.000141,
+          "samples_ns": [
+            28305941258,
+            28307026522,
+            28305280583
+          ],
+          "samples_ts": [
+            4.52202,
+            4.52185,
+            4.52212
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:50:02Z",
+          "avg_ns": 192554860831,
+          "stddev_ns": 1938616,
+          "avg_ts": 2.658982,
+          "stddev_ts": 2.7e-05,
+          "samples_ns": [
+            192552643629,
+            192556236352,
+            192555702512
+          ],
+          "samples_ts": [
+            2.65901,
+            2.65896,
+            2.65897
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1013
+    },
+    {
+      "timestamp_utc": "2025-12-10T08:09:41.151995+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T07:59:41Z\",\n    \"avg_ns\": 114317990189,\n    \"stddev_ns\": 1221122,\n    \"avg_ts\": 4.478735,\n    \"stddev_ts\": 0.000044,\n    \"samples_ns\": [ 114319280153, 114317224788, 114317465628 ],\n    \"samples_ts\": [ 4.47868, 4.47877, 4.47876 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:07:18Z\",\n    \"avg_ns\": 47273823084,\n    \"stddev_ns\": 10297396,\n    \"avg_ts\": 2.707630,\n    \"stddev_ts\": 0.000589,\n    \"samples_ns\": [ 47285700223, 47268261374, 47267507657 ],\n    \"samples_ts\": [ 2.70695, 2.70795, 2.70799 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T07:59:41Z",
+          "avg_ns": 114317990189,
+          "stddev_ns": 1221122,
+          "avg_ts": 4.478735,
+          "stddev_ts": 4.4e-05,
+          "samples_ns": [
+            114319280153,
+            114317224788,
+            114317465628
+          ],
+          "samples_ts": [
+            4.47868,
+            4.47877,
+            4.47876
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:07:18Z",
+          "avg_ns": 47273823084,
+          "stddev_ns": 10297396,
+          "avg_ts": 2.70763,
+          "stddev_ts": 0.000589,
+          "samples_ns": [
+            47285700223,
+            47268261374,
+            47267507657
+          ],
+          "samples_ts": [
+            2.70695,
+            2.70795,
+            2.70799
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1014
+    },
+    {
+      "timestamp_utc": "2025-12-10T08:26:56.582489+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:09:42Z\",\n    \"avg_ns\": 114313171466,\n    \"stddev_ns\": 2129645,\n    \"avg_ts\": 4.478924,\n    \"stddev_ts\": 0.000081,\n    \"samples_ns\": [ 114311180842, 114315322124, 114313011434 ],\n    \"samples_ts\": [ 4.479, 4.47884, 4.47893 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:17:19Z\",\n    \"avg_ns\": 192186723699,\n    \"stddev_ns\": 4645096,\n    \"avg_ts\": 2.664076,\n    \"stddev_ts\": 0.000064,\n    \"samples_ns\": [ 192187279703, 192181867413, 192191023983 ],\n    \"samples_ts\": [ 2.66407, 2.66414, 2.66402 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:09:42Z",
+          "avg_ns": 114313171466,
+          "stddev_ns": 2129645,
+          "avg_ts": 4.478924,
+          "stddev_ts": 8.1e-05,
+          "samples_ns": [
+            114311180842,
+            114315322124,
+            114313011434
+          ],
+          "samples_ts": [
+            4.479,
+            4.47884,
+            4.47893
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:17:19Z",
+          "avg_ns": 192186723699,
+          "stddev_ns": 4645096,
+          "avg_ts": 2.664076,
+          "stddev_ts": 6.4e-05,
+          "samples_ns": [
+            192187279703,
+            192181867413,
+            192191023983
+          ],
+          "samples_ts": [
+            2.66407,
+            2.66414,
+            2.66402
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1015
+    },
+    {
+      "timestamp_utc": "2025-12-10T08:31:15.998691+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:26:57Z\",\n    \"avg_ns\": 28314092830,\n    \"stddev_ns\": 722905,\n    \"avg_ts\": 4.520717,\n    \"stddev_ts\": 0.000115,\n    \"samples_ns\": [ 28313591396, 28313765607, 28314921487 ],\n    \"samples_ts\": [ 4.5208, 4.52077, 4.52058 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:28:50Z\",\n    \"avg_ns\": 48186277713,\n    \"stddev_ns\": 3827496,\n    \"avg_ts\": 2.656358,\n    \"stddev_ts\": 0.000210,\n    \"samples_ns\": [ 48190654106, 48184523973, 48183655062 ],\n    \"samples_ts\": [ 2.65612, 2.65645, 2.6565 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:26:57Z",
+          "avg_ns": 28314092830,
+          "stddev_ns": 722905,
+          "avg_ts": 4.520717,
+          "stddev_ts": 0.000115,
+          "samples_ns": [
+            28313591396,
+            28313765607,
+            28314921487
+          ],
+          "samples_ts": [
+            4.5208,
+            4.52077,
+            4.52058
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:28:50Z",
+          "avg_ns": 48186277713,
+          "stddev_ns": 3827496,
+          "avg_ts": 2.656358,
+          "stddev_ts": 0.00021,
+          "samples_ns": [
+            48190654106,
+            48184523973,
+            48183655062
+          ],
+          "samples_ts": [
+            2.65612,
+            2.65645,
+            2.6565
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1016
+    },
+    {
+      "timestamp_utc": "2025-12-10T08:42:50.638153+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:31:17Z\",\n    \"avg_ns\": 28312856345,\n    \"stddev_ns\": 1248399,\n    \"avg_ts\": 4.520914,\n    \"stddev_ts\": 0.000199,\n    \"samples_ns\": [ 28312958757, 28314050384, 28311559894 ],\n    \"samples_ts\": [ 4.5209, 4.52072, 4.52112 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:33:10Z\",\n    \"avg_ns\": 193261954999,\n    \"stddev_ns\": 3167436,\n    \"avg_ts\": 2.649254,\n    \"stddev_ts\": 0.000043,\n    \"samples_ns\": [ 193263481316, 193264070281, 193258313400 ],\n    \"samples_ts\": [ 2.64923, 2.64922, 2.6493 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:31:17Z",
+          "avg_ns": 28312856345,
+          "stddev_ns": 1248399,
+          "avg_ts": 4.520914,
+          "stddev_ts": 0.000199,
+          "samples_ns": [
+            28312958757,
+            28314050384,
+            28311559894
+          ],
+          "samples_ts": [
+            4.5209,
+            4.52072,
+            4.52112
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:33:10Z",
+          "avg_ns": 193261954999,
+          "stddev_ns": 3167436,
+          "avg_ts": 2.649254,
+          "stddev_ts": 4.3e-05,
+          "samples_ns": [
+            193263481316,
+            193264070281,
+            193258313400
+          ],
+          "samples_ts": [
+            2.64923,
+            2.64922,
+            2.6493
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1017
+    },
+    {
+      "timestamp_utc": "2025-12-10T08:53:01.982055+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:42:51Z\",\n    \"avg_ns\": 116653152357,\n    \"stddev_ns\": 1811918,\n    \"avg_ts\": 4.389080,\n    \"stddev_ts\": 0.000068,\n    \"samples_ns\": [ 116651072916, 116654392056, 116653992099 ],\n    \"samples_ts\": [ 4.38916, 4.38903, 4.38905 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:50:38Z\",\n    \"avg_ns\": 47702418967,\n    \"stddev_ns\": 11057097,\n    \"avg_ts\": 2.683302,\n    \"stddev_ts\": 0.000622,\n    \"samples_ns\": [ 47714266780, 47700615909, 47692374212 ],\n    \"samples_ts\": [ 2.68264, 2.6834, 2.68387 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:42:51Z",
+          "avg_ns": 116653152357,
+          "stddev_ns": 1811918,
+          "avg_ts": 4.38908,
+          "stddev_ts": 6.8e-05,
+          "samples_ns": [
+            116651072916,
+            116654392056,
+            116653992099
+          ],
+          "samples_ts": [
+            4.38916,
+            4.38903,
+            4.38905
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:50:38Z",
+          "avg_ns": 47702418967,
+          "stddev_ns": 11057097,
+          "avg_ts": 2.683302,
+          "stddev_ts": 0.000622,
+          "samples_ns": [
+            47714266780,
+            47700615909,
+            47692374212
+          ],
+          "samples_ts": [
+            2.68264,
+            2.6834,
+            2.68387
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1018
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:10:26.970063+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T08:53:03Z\",\n    \"avg_ns\": 116561920921,\n    \"stddev_ns\": 10859049,\n    \"avg_ts\": 4.392515,\n    \"stddev_ts\": 0.000409,\n    \"samples_ns\": [ 116549546406, 116569793220, 116566423139 ],\n    \"samples_ts\": [ 4.39298, 4.39222, 4.39235 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:00:49Z\",\n    \"avg_ns\": 192374335005,\n    \"stddev_ns\": 12515998,\n    \"avg_ts\": 2.661478,\n    \"stddev_ts\": 0.000173,\n    \"samples_ns\": [ 192388763289, 192366549613, 192367692114 ],\n    \"samples_ts\": [ 2.66128, 2.66159, 2.66157 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T08:53:03Z",
+          "avg_ns": 116561920921,
+          "stddev_ns": 10859049,
+          "avg_ts": 4.392515,
+          "stddev_ts": 0.000409,
+          "samples_ns": [
+            116549546406,
+            116569793220,
+            116566423139
+          ],
+          "samples_ts": [
+            4.39298,
+            4.39222,
+            4.39235
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:00:49Z",
+          "avg_ns": 192374335005,
+          "stddev_ns": 12515998,
+          "avg_ts": 2.661478,
+          "stddev_ts": 0.000173,
+          "samples_ns": [
+            192388763289,
+            192366549613,
+            192367692114
+          ],
+          "samples_ts": [
+            2.66128,
+            2.66159,
+            2.66157
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1019
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:14:46.193916+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:10:28Z\",\n    \"avg_ns\": 28287051715,\n    \"stddev_ns\": 806536,\n    \"avg_ts\": 4.525039,\n    \"stddev_ts\": 0.000129,\n    \"samples_ns\": [ 28287644338, 28286133230, 28287377577 ],\n    \"samples_ts\": [ 4.52494, 4.52519, 4.52499 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:12:21Z\",\n    \"avg_ns\": 48151180555,\n    \"stddev_ns\": 1298045,\n    \"avg_ts\": 2.658294,\n    \"stddev_ts\": 0.000070,\n    \"samples_ns\": [ 48150270499, 48152619183, 48150651985 ],\n    \"samples_ts\": [ 2.65834, 2.65821, 2.65832 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:10:28Z",
+          "avg_ns": 28287051715,
+          "stddev_ns": 806536,
+          "avg_ts": 4.525039,
+          "stddev_ts": 0.000129,
+          "samples_ns": [
+            28287644338,
+            28286133230,
+            28287377577
+          ],
+          "samples_ts": [
+            4.52494,
+            4.52519,
+            4.52499
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:12:21Z",
+          "avg_ns": 48151180555,
+          "stddev_ns": 1298045,
+          "avg_ts": 2.658294,
+          "stddev_ts": 7e-05,
+          "samples_ns": [
+            48150270499,
+            48152619183,
+            48150651985
+          ],
+          "samples_ts": [
+            2.65834,
+            2.65821,
+            2.65832
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1020
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:26:22.042436+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:14:47Z\",\n    \"avg_ns\": 28310224744,\n    \"stddev_ns\": 3070694,\n    \"avg_ts\": 4.521335,\n    \"stddev_ts\": 0.000490,\n    \"samples_ns\": [ 28308574510, 28308332011, 28313767711 ],\n    \"samples_ts\": [ 4.5216, 4.52164, 4.52077 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:16:40Z\",\n    \"avg_ns\": 193642723186,\n    \"stddev_ns\": 11150906,\n    \"avg_ts\": 2.644045,\n    \"stddev_ts\": 0.000152,\n    \"samples_ns\": [ 193633193501, 193640002183, 193654973875 ],\n    \"samples_ts\": [ 2.64417, 2.64408, 2.64388 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:14:47Z",
+          "avg_ns": 28310224744,
+          "stddev_ns": 3070694,
+          "avg_ts": 4.521335,
+          "stddev_ts": 0.00049,
+          "samples_ns": [
+            28308574510,
+            28308332011,
+            28313767711
+          ],
+          "samples_ts": [
+            4.5216,
+            4.52164,
+            4.52077
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:16:40Z",
+          "avg_ns": 193642723186,
+          "stddev_ns": 11150906,
+          "avg_ts": 2.644045,
+          "stddev_ts": 0.000152,
+          "samples_ns": [
+            193633193501,
+            193640002183,
+            193654973875
+          ],
+          "samples_ts": [
+            2.64417,
+            2.64408,
+            2.64388
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1021
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:36:23.854063+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:26:23Z\",\n    \"avg_ns\": 113823717532,\n    \"stddev_ns\": 1546363,\n    \"avg_ts\": 4.498184,\n    \"stddev_ts\": 0.000060,\n    \"samples_ns\": [ 113822005992, 113824856832, 113824289773 ],\n    \"samples_ts\": [ 4.49825, 4.49814, 4.49816 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:33:58Z\",\n    \"avg_ns\": 48298799822,\n    \"stddev_ns\": 1581608,\n    \"avg_ts\": 2.650169,\n    \"stddev_ts\": 0.000085,\n    \"samples_ns\": [ 48300582254, 48297759977, 48298057237 ],\n    \"samples_ts\": [ 2.65007, 2.65023, 2.65021 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:26:23Z",
+          "avg_ns": 113823717532,
+          "stddev_ns": 1546363,
+          "avg_ts": 4.498184,
+          "stddev_ts": 6e-05,
+          "samples_ns": [
+            113822005992,
+            113824856832,
+            113824289773
+          ],
+          "samples_ts": [
+            4.49825,
+            4.49814,
+            4.49816
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:33:58Z",
+          "avg_ns": 48298799822,
+          "stddev_ns": 1581608,
+          "avg_ts": 2.650169,
+          "stddev_ts": 8.5e-05,
+          "samples_ns": [
+            48300582254,
+            48297759977,
+            48298057237
+          ],
+          "samples_ts": [
+            2.65007,
+            2.65023,
+            2.65021
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1022
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:53:46.389431+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:36:24Z\",\n    \"avg_ns\": 113773208891,\n    \"stddev_ns\": 5133625,\n    \"avg_ts\": 4.500181,\n    \"stddev_ts\": 0.000203,\n    \"samples_ns\": [ 113778934302, 113769016199, 113771676172 ],\n    \"samples_ts\": [ 4.49995, 4.50035, 4.50024 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:44:00Z\",\n    \"avg_ns\": 195266568804,\n    \"stddev_ns\": 13490958,\n    \"avg_ts\": 2.622057,\n    \"stddev_ts\": 0.000181,\n    \"samples_ns\": [ 195259438845, 195258146981, 195282120587 ],\n    \"samples_ts\": [ 2.62215, 2.62217, 2.62185 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:36:24Z",
+          "avg_ns": 113773208891,
+          "stddev_ns": 5133625,
+          "avg_ts": 4.500181,
+          "stddev_ts": 0.000203,
+          "samples_ns": [
+            113778934302,
+            113769016199,
+            113771676172
+          ],
+          "samples_ts": [
+            4.49995,
+            4.50035,
+            4.50024
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:44:00Z",
+          "avg_ns": 195266568804,
+          "stddev_ns": 13490958,
+          "avg_ts": 2.622057,
+          "stddev_ts": 0.000181,
+          "samples_ns": [
+            195259438845,
+            195258146981,
+            195282120587
+          ],
+          "samples_ts": [
+            2.62215,
+            2.62217,
+            2.62185
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1023
+    },
+    {
+      "timestamp_utc": "2025-12-10T09:58:05.639569+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:53:47Z\",\n    \"avg_ns\": 28287918358,\n    \"stddev_ns\": 1632037,\n    \"avg_ts\": 4.524900,\n    \"stddev_ts\": 0.000261,\n    \"samples_ns\": [ 28289760293, 28287342358, 28286652423 ],\n    \"samples_ts\": [ 4.52461, 4.52499, 4.5251 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:55:40Z\",\n    \"avg_ns\": 48150041697,\n    \"stddev_ns\": 3441524,\n    \"avg_ts\": 2.658357,\n    \"stddev_ts\": 0.000189,\n    \"samples_ns\": [ 48153993527, 48148253065, 48147878501 ],\n    \"samples_ts\": [ 2.65814, 2.65846, 2.65848 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:53:47Z",
+          "avg_ns": 28287918358,
+          "stddev_ns": 1632037,
+          "avg_ts": 4.5249,
+          "stddev_ts": 0.000261,
+          "samples_ns": [
+            28289760293,
+            28287342358,
+            28286652423
+          ],
+          "samples_ts": [
+            4.52461,
+            4.52499,
+            4.5251
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:55:40Z",
+          "avg_ns": 48150041697,
+          "stddev_ns": 3441524,
+          "avg_ts": 2.658357,
+          "stddev_ts": 0.000189,
+          "samples_ns": [
+            48153993527,
+            48148253065,
+            48147878501
+          ],
+          "samples_ts": [
+            2.65814,
+            2.65846,
+            2.65848
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1024
+    },
+    {
+      "timestamp_utc": "2025-12-10T10:09:40.291890+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:58:06Z\",\n    \"avg_ns\": 28291193667,\n    \"stddev_ns\": 3171800,\n    \"avg_ts\": 4.524376,\n    \"stddev_ts\": 0.000506,\n    \"samples_ns\": [ 28288968876, 28294814342, 28289797785 ],\n    \"samples_ts\": [ 4.52473, 4.5238, 4.5246 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T09:59:59Z\",\n    \"avg_ns\": 193292733446,\n    \"stddev_ns\": 4148573,\n    \"avg_ts\": 2.648832,\n    \"stddev_ts\": 0.000056,\n    \"samples_ns\": [ 193297454681, 193290046428, 193290699231 ],\n    \"samples_ts\": [ 2.64877, 2.64887, 2.64886 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:58:06Z",
+          "avg_ns": 28291193667,
+          "stddev_ns": 3171800,
+          "avg_ts": 4.524376,
+          "stddev_ts": 0.000506,
+          "samples_ns": [
+            28288968876,
+            28294814342,
+            28289797785
+          ],
+          "samples_ts": [
+            4.52473,
+            4.5238,
+            4.5246
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T09:59:59Z",
+          "avg_ns": 193292733446,
+          "stddev_ns": 4148573,
+          "avg_ts": 2.648832,
+          "stddev_ts": 5.6e-05,
+          "samples_ns": [
+            193297454681,
+            193290046428,
+            193290699231
+          ],
+          "samples_ts": [
+            2.64877,
+            2.64887,
+            2.64886
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1025
+    },
+    {
+      "timestamp_utc": "2025-12-10T10:19:42.052741+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:09:41Z\",\n    \"avg_ns\": 114541715605,\n    \"stddev_ns\": 1425789,\n    \"avg_ts\": 4.469987,\n    \"stddev_ts\": 0.000052,\n    \"samples_ns\": [ 114540897418, 114543265629, 114540983770 ],\n    \"samples_ts\": [ 4.47002, 4.46993, 4.47002 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:17:19Z\",\n    \"avg_ns\": 47334401021,\n    \"stddev_ns\": 6544553,\n    \"avg_ts\": 2.704164,\n    \"stddev_ts\": 0.000373,\n    \"samples_ns\": [ 47341534351, 47328695848, 47332972866 ],\n    \"samples_ts\": [ 2.70376, 2.70449, 2.70425 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:09:41Z",
+          "avg_ns": 114541715605,
+          "stddev_ns": 1425789,
+          "avg_ts": 4.469987,
+          "stddev_ts": 5.2e-05,
+          "samples_ns": [
+            114540897418,
+            114543265629,
+            114540983770
+          ],
+          "samples_ts": [
+            4.47002,
+            4.46993,
+            4.47002
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:17:19Z",
+          "avg_ns": 47334401021,
+          "stddev_ns": 6544553,
+          "avg_ts": 2.704164,
+          "stddev_ts": 0.000373,
+          "samples_ns": [
+            47341534351,
+            47328695848,
+            47332972866
+          ],
+          "samples_ts": [
+            2.70376,
+            2.70449,
+            2.70425
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1026
+    },
+    {
+      "timestamp_utc": "2025-12-10T10:36:55.931333+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:19:43Z\",\n    \"avg_ns\": 114431303235,\n    \"stddev_ns\": 3156704,\n    \"avg_ts\": 4.474300,\n    \"stddev_ts\": 0.000122,\n    \"samples_ns\": [ 114433595429, 114432564483, 114427749795 ],\n    \"samples_ts\": [ 4.47421, 4.47425, 4.47444 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:27:20Z\",\n    \"avg_ns\": 191508751758,\n    \"stddev_ns\": 5315694,\n    \"avg_ts\": 2.673507,\n    \"stddev_ts\": 0.000074,\n    \"samples_ns\": [ 191511032914, 191502715179, 191512507183 ],\n    \"samples_ts\": [ 2.67348, 2.67359, 2.67345 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:19:43Z",
+          "avg_ns": 114431303235,
+          "stddev_ns": 3156704,
+          "avg_ts": 4.4743,
+          "stddev_ts": 0.000122,
+          "samples_ns": [
+            114433595429,
+            114432564483,
+            114427749795
+          ],
+          "samples_ts": [
+            4.47421,
+            4.47425,
+            4.47444
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:27:20Z",
+          "avg_ns": 191508751758,
+          "stddev_ns": 5315694,
+          "avg_ts": 2.673507,
+          "stddev_ts": 7.4e-05,
+          "samples_ns": [
+            191511032914,
+            191502715179,
+            191512507183
+          ],
+          "samples_ts": [
+            2.67348,
+            2.67359,
+            2.67345
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1027
+    },
+    {
+      "timestamp_utc": "2025-12-10T10:41:15.086833+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:36:57Z\",\n    \"avg_ns\": 28288246419,\n    \"stddev_ns\": 2360818,\n    \"avg_ts\": 4.524847,\n    \"stddev_ts\": 0.000378,\n    \"samples_ns\": [ 28290759655, 28287904239, 28286075363 ],\n    \"samples_ts\": [ 4.52445, 4.5249, 4.52519 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:38:50Z\",\n    \"avg_ns\": 48124612925,\n    \"stddev_ns\": 909863,\n    \"avg_ts\": 2.659762,\n    \"stddev_ts\": 0.000050,\n    \"samples_ns\": [ 48123616323, 48125399218, 48124823234 ],\n    \"samples_ts\": [ 2.65982, 2.65972, 2.65975 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:36:57Z",
+          "avg_ns": 28288246419,
+          "stddev_ns": 2360818,
+          "avg_ts": 4.524847,
+          "stddev_ts": 0.000378,
+          "samples_ns": [
+            28290759655,
+            28287904239,
+            28286075363
+          ],
+          "samples_ts": [
+            4.52445,
+            4.5249,
+            4.52519
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:38:50Z",
+          "avg_ns": 48124612925,
+          "stddev_ns": 909863,
+          "avg_ts": 2.659762,
+          "stddev_ts": 5e-05,
+          "samples_ns": [
+            48123616323,
+            48125399218,
+            48124823234
+          ],
+          "samples_ts": [
+            2.65982,
+            2.65972,
+            2.65975
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1028
+    },
+    {
+      "timestamp_utc": "2025-12-10T10:52:52.975907+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:41:16Z\",\n    \"avg_ns\": 28285355368,\n    \"stddev_ns\": 2539615,\n    \"avg_ts\": 4.525310,\n    \"stddev_ts\": 0.000406,\n    \"samples_ns\": [ 28287520067, 28285986262, 28282559775 ],\n    \"samples_ts\": [ 4.52496, 4.52521, 4.52576 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:43:09Z\",\n    \"avg_ns\": 194374181917,\n    \"stddev_ns\": 2026890,\n    \"avg_ts\": 2.634095,\n    \"stddev_ts\": 0.000027,\n    \"samples_ns\": [ 194372687532, 194376489044, 194373369175 ],\n    \"samples_ts\": [ 2.63411, 2.63406, 2.63411 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:41:16Z",
+          "avg_ns": 28285355368,
+          "stddev_ns": 2539615,
+          "avg_ts": 4.52531,
+          "stddev_ts": 0.000406,
+          "samples_ns": [
+            28287520067,
+            28285986262,
+            28282559775
+          ],
+          "samples_ts": [
+            4.52496,
+            4.52521,
+            4.52576
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:43:09Z",
+          "avg_ns": 194374181917,
+          "stddev_ns": 2026890,
+          "avg_ts": 2.634095,
+          "stddev_ts": 2.7e-05,
+          "samples_ns": [
+            194372687532,
+            194376489044,
+            194373369175
+          ],
+          "samples_ts": [
+            2.63411,
+            2.63406,
+            2.63411
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1029
+    },
+    {
+      "timestamp_utc": "2025-12-10T11:03:03.763592+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T10:52:54Z\",\n    \"avg_ns\": 116666356459,\n    \"stddev_ns\": 3664039,\n    \"avg_ts\": 4.388583,\n    \"stddev_ts\": 0.000138,\n    \"samples_ns\": [ 116669740512, 116662465268, 116666863597 ],\n    \"samples_ts\": [ 4.38846, 4.38873, 4.38856 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:00:41Z\",\n    \"avg_ns\": 47398431503,\n    \"stddev_ns\": 6920746,\n    \"avg_ts\": 2.700511,\n    \"stddev_ts\": 0.000394,\n    \"samples_ns\": [ 47406239919, 47395967234, 47393087358 ],\n    \"samples_ts\": [ 2.70007, 2.70065, 2.70082 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T10:52:54Z",
+          "avg_ns": 116666356459,
+          "stddev_ns": 3664039,
+          "avg_ts": 4.388583,
+          "stddev_ts": 0.000138,
+          "samples_ns": [
+            116669740512,
+            116662465268,
+            116666863597
+          ],
+          "samples_ts": [
+            4.38846,
+            4.38873,
+            4.38856
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:00:41Z",
+          "avg_ns": 47398431503,
+          "stddev_ns": 6920746,
+          "avg_ts": 2.700511,
+          "stddev_ts": 0.000394,
+          "samples_ns": [
+            47406239919,
+            47395967234,
+            47393087358
+          ],
+          "samples_ts": [
+            2.70007,
+            2.70065,
+            2.70082
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1030
+    },
+    {
+      "timestamp_utc": "2025-12-10T11:20:25.296650+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:03:04Z\",\n    \"avg_ns\": 116378777847,\n    \"stddev_ns\": 4442439,\n    \"avg_ts\": 4.399428,\n    \"stddev_ts\": 0.000167,\n    \"samples_ns\": [ 116378805627, 116374334701, 116383193214 ],\n    \"samples_ts\": [ 4.39943, 4.3996, 4.39926 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:10:50Z\",\n    \"avg_ns\": 191457124205,\n    \"stddev_ns\": 7818383,\n    \"avg_ts\": 2.674228,\n    \"stddev_ts\": 0.000109,\n    \"samples_ns\": [ 191463857191, 191458929210, 191448586216 ],\n    \"samples_ts\": [ 2.67413, 2.6742, 2.67435 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:03:04Z",
+          "avg_ns": 116378777847,
+          "stddev_ns": 4442439,
+          "avg_ts": 4.399428,
+          "stddev_ts": 0.000167,
+          "samples_ns": [
+            116378805627,
+            116374334701,
+            116383193214
+          ],
+          "samples_ts": [
+            4.39943,
+            4.3996,
+            4.39926
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:10:50Z",
+          "avg_ns": 191457124205,
+          "stddev_ns": 7818383,
+          "avg_ts": 2.674228,
+          "stddev_ts": 0.000109,
+          "samples_ns": [
+            191463857191,
+            191458929210,
+            191448586216
+          ],
+          "samples_ts": [
+            2.67413,
+            2.6742,
+            2.67435
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1031
+    },
+    {
+      "timestamp_utc": "2025-12-10T11:24:41.780316+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:20:26Z\",\n    \"avg_ns\": 28287988518,\n    \"stddev_ns\": 2073775,\n    \"avg_ts\": 4.524889,\n    \"stddev_ts\": 0.000331,\n    \"samples_ns\": [ 28286532550, 28287078707, 28290354298 ],\n    \"samples_ts\": [ 4.52512, 4.52503, 4.52451 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:22:19Z\",\n    \"avg_ns\": 47246607016,\n    \"stddev_ns\": 1980480,\n    \"avg_ts\": 2.709189,\n    \"stddev_ts\": 0.000112,\n    \"samples_ns\": [ 47248850881, 47245257894, 47245712275 ],\n    \"samples_ts\": [ 2.70906, 2.70927, 2.70924 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:20:26Z",
+          "avg_ns": 28287988518,
+          "stddev_ns": 2073775,
+          "avg_ts": 4.524889,
+          "stddev_ts": 0.000331,
+          "samples_ns": [
+            28286532550,
+            28287078707,
+            28290354298
+          ],
+          "samples_ts": [
+            4.52512,
+            4.52503,
+            4.52451
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:22:19Z",
+          "avg_ns": 47246607016,
+          "stddev_ns": 1980480,
+          "avg_ts": 2.709189,
+          "stddev_ts": 0.000112,
+          "samples_ns": [
+            47248850881,
+            47245257894,
+            47245712275
+          ],
+          "samples_ts": [
+            2.70906,
+            2.70927,
+            2.70924
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1032
+    },
+    {
+      "timestamp_utc": "2025-12-10T11:36:10.349612+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:24:42Z\",\n    \"avg_ns\": 28283319145,\n    \"stddev_ns\": 735242,\n    \"avg_ts\": 4.525636,\n    \"stddev_ts\": 0.000115,\n    \"samples_ns\": [ 28284138350, 28282814799, 28283004287 ],\n    \"samples_ts\": [ 4.5255, 4.52572, 4.52569 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:26:36Z\",\n    \"avg_ns\": 191276328777,\n    \"stddev_ns\": 5641674,\n    \"avg_ts\": 2.676756,\n    \"stddev_ts\": 0.000079,\n    \"samples_ns\": [ 191280383264, 191278717340, 191269885727 ],\n    \"samples_ts\": [ 2.6767, 2.67672, 2.67685 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:24:42Z",
+          "avg_ns": 28283319145,
+          "stddev_ns": 735242,
+          "avg_ts": 4.525636,
+          "stddev_ts": 0.000115,
+          "samples_ns": [
+            28284138350,
+            28282814799,
+            28283004287
+          ],
+          "samples_ts": [
+            4.5255,
+            4.52572,
+            4.52569
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:26:36Z",
+          "avg_ns": 191276328777,
+          "stddev_ns": 5641674,
+          "avg_ts": 2.676756,
+          "stddev_ts": 7.9e-05,
+          "samples_ns": [
+            191280383264,
+            191278717340,
+            191269885727
+          ],
+          "samples_ts": [
+            2.6767,
+            2.67672,
+            2.67685
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1033
+    },
+    {
+      "timestamp_utc": "2025-12-10T11:46:10.570300+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:36:11Z\",\n    \"avg_ns\": 113920390899,\n    \"stddev_ns\": 1975687,\n    \"avg_ts\": 4.494367,\n    \"stddev_ts\": 0.000078,\n    \"samples_ns\": [ 113918409241, 113920402895, 113922360561 ],\n    \"samples_ts\": [ 4.49444, 4.49437, 4.49429 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:43:47Z\",\n    \"avg_ns\": 47635933837,\n    \"stddev_ns\": 4185712,\n    \"avg_ts\": 2.687047,\n    \"stddev_ts\": 0.000235,\n    \"samples_ns\": [ 47640577255, 47634731918, 47632492340 ],\n    \"samples_ts\": [ 2.68679, 2.68711, 2.68724 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:36:11Z",
+          "avg_ns": 113920390899,
+          "stddev_ns": 1975687,
+          "avg_ts": 4.494367,
+          "stddev_ts": 7.8e-05,
+          "samples_ns": [
+            113918409241,
+            113920402895,
+            113922360561
+          ],
+          "samples_ts": [
+            4.49444,
+            4.49437,
+            4.49429
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:43:47Z",
+          "avg_ns": 47635933837,
+          "stddev_ns": 4185712,
+          "avg_ts": 2.687047,
+          "stddev_ts": 0.000235,
+          "samples_ns": [
+            47640577255,
+            47634731918,
+            47632492340
+          ],
+          "samples_ts": [
+            2.68679,
+            2.68711,
+            2.68724
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1034
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:03:32.926992+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:46:11Z\",\n    \"avg_ns\": 113856060609,\n    \"stddev_ns\": 1731990,\n    \"avg_ts\": 4.496906,\n    \"stddev_ts\": 0.000066,\n    \"samples_ns\": [ 113854475507, 113855911000, 113857795322 ],\n    \"samples_ts\": [ 4.49697, 4.49691, 4.49684 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T11:53:47Z\",\n    \"avg_ns\": 195105152625,\n    \"stddev_ns\": 2275296,\n    \"avg_ts\": 2.624226,\n    \"stddev_ts\": 0.000029,\n    \"samples_ns\": [ 195103392721, 195107602232, 195104462924 ],\n    \"samples_ts\": [ 2.62425, 2.62419, 2.62424 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:46:11Z",
+          "avg_ns": 113856060609,
+          "stddev_ns": 1731990,
+          "avg_ts": 4.496906,
+          "stddev_ts": 6.6e-05,
+          "samples_ns": [
+            113854475507,
+            113855911000,
+            113857795322
+          ],
+          "samples_ts": [
+            4.49697,
+            4.49691,
+            4.49684
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T11:53:47Z",
+          "avg_ns": 195105152625,
+          "stddev_ns": 2275296,
+          "avg_ts": 2.624226,
+          "stddev_ts": 2.9e-05,
+          "samples_ns": [
+            195103392721,
+            195107602232,
+            195104462924
+          ],
+          "samples_ts": [
+            2.62425,
+            2.62419,
+            2.62424
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1035
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:07:49.605769+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:03:34Z\",\n    \"avg_ns\": 28281709732,\n    \"stddev_ns\": 1122853,\n    \"avg_ts\": 4.525893,\n    \"stddev_ts\": 0.000180,\n    \"samples_ns\": [ 28282956143, 28281395790, 28280777263 ],\n    \"samples_ts\": [ 4.52569, 4.52594, 4.52604 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:05:27Z\",\n    \"avg_ns\": 47317440243,\n    \"stddev_ns\": 3463735,\n    \"avg_ts\": 2.705134,\n    \"stddev_ts\": 0.000198,\n    \"samples_ns\": [ 47319651657, 47313456624, 47319212449 ],\n    \"samples_ts\": [ 2.70501, 2.70536, 2.70503 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:03:34Z",
+          "avg_ns": 28281709732,
+          "stddev_ns": 1122853,
+          "avg_ts": 4.525893,
+          "stddev_ts": 0.00018,
+          "samples_ns": [
+            28282956143,
+            28281395790,
+            28280777263
+          ],
+          "samples_ts": [
+            4.52569,
+            4.52594,
+            4.52604
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:05:27Z",
+          "avg_ns": 47317440243,
+          "stddev_ns": 3463735,
+          "avg_ts": 2.705134,
+          "stddev_ts": 0.000198,
+          "samples_ns": [
+            47319651657,
+            47313456624,
+            47319212449
+          ],
+          "samples_ts": [
+            2.70501,
+            2.70536,
+            2.70503
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1036
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:19:19.618345+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:07:50Z\",\n    \"avg_ns\": 28294889738,\n    \"stddev_ns\": 1003588,\n    \"avg_ts\": 4.523785,\n    \"stddev_ts\": 0.000158,\n    \"samples_ns\": [ 28295811352, 28295013624, 28293844239 ],\n    \"samples_ts\": [ 4.52364, 4.52377, 4.52395 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:09:43Z\",\n    \"avg_ns\": 191720234560,\n    \"stddev_ns\": 12902426,\n    \"avg_ts\": 2.670558,\n    \"stddev_ts\": 0.000180,\n    \"samples_ns\": [ 191727228874, 191705345200, 191728129606 ],\n    \"samples_ts\": [ 2.67046, 2.67077, 2.67045 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:07:50Z",
+          "avg_ns": 28294889738,
+          "stddev_ns": 1003588,
+          "avg_ts": 4.523785,
+          "stddev_ts": 0.000158,
+          "samples_ns": [
+            28295811352,
+            28295013624,
+            28293844239
+          ],
+          "samples_ts": [
+            4.52364,
+            4.52377,
+            4.52395
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:09:43Z",
+          "avg_ns": 191720234560,
+          "stddev_ns": 12902426,
+          "avg_ts": 2.670558,
+          "stddev_ts": 0.00018,
+          "samples_ns": [
+            191727228874,
+            191705345200,
+            191728129606
+          ],
+          "samples_ts": [
+            2.67046,
+            2.67077,
+            2.67045
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1037
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:29:21.385306+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:19:20Z\",\n    \"avg_ns\": 114303754945,\n    \"stddev_ns\": 2886462,\n    \"avg_ts\": 4.479293,\n    \"stddev_ts\": 0.000113,\n    \"samples_ns\": [ 114306558978, 114303913286, 114300792571 ],\n    \"samples_ts\": [ 4.47918, 4.47929, 4.47941 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:26:57Z\",\n    \"avg_ns\": 47641443797,\n    \"stddev_ns\": 4124705,\n    \"avg_ts\": 2.686736,\n    \"stddev_ts\": 0.000233,\n    \"samples_ns\": [ 47645986791, 47640410887, 47637933713 ],\n    \"samples_ts\": [ 2.68648, 2.68679, 2.68693 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:19:20Z",
+          "avg_ns": 114303754945,
+          "stddev_ns": 2886462,
+          "avg_ts": 4.479293,
+          "stddev_ts": 0.000113,
+          "samples_ns": [
+            114306558978,
+            114303913286,
+            114300792571
+          ],
+          "samples_ts": [
+            4.47918,
+            4.47929,
+            4.47941
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:26:57Z",
+          "avg_ns": 47641443797,
+          "stddev_ns": 4124705,
+          "avg_ts": 2.686736,
+          "stddev_ts": 0.000233,
+          "samples_ns": [
+            47645986791,
+            47640410887,
+            47637933713
+          ],
+          "samples_ts": [
+            2.68648,
+            2.68679,
+            2.68693
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1038
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:46:41.793867+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:29:22Z\",\n    \"avg_ns\": 114371590642,\n    \"stddev_ns\": 2649224,\n    \"avg_ts\": 4.476636,\n    \"stddev_ts\": 0.000103,\n    \"samples_ns\": [ 114374571701, 114369611251, 114370588975 ],\n    \"samples_ts\": [ 4.47652, 4.47671, 4.47668 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:37:00Z\",\n    \"avg_ns\": 193753516122,\n    \"stddev_ns\": 3032953,\n    \"avg_ts\": 2.642533,\n    \"stddev_ts\": 0.000041,\n    \"samples_ns\": [ 193756783129, 193750790003, 193752975234 ],\n    \"samples_ts\": [ 2.64249, 2.64257, 2.64254 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:29:22Z",
+          "avg_ns": 114371590642,
+          "stddev_ns": 2649224,
+          "avg_ts": 4.476636,
+          "stddev_ts": 0.000103,
+          "samples_ns": [
+            114374571701,
+            114369611251,
+            114370588975
+          ],
+          "samples_ts": [
+            4.47652,
+            4.47671,
+            4.47668
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:37:00Z",
+          "avg_ns": 193753516122,
+          "stddev_ns": 3032953,
+          "avg_ts": 2.642533,
+          "stddev_ts": 4.1e-05,
+          "samples_ns": [
+            193756783129,
+            193750790003,
+            193752975234
+          ],
+          "samples_ts": [
+            2.64249,
+            2.64257,
+            2.64254
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1039
+    },
+    {
+      "timestamp_utc": "2025-12-10T12:50:59.271454+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:46:42Z\",\n    \"avg_ns\": 28290007775,\n    \"stddev_ns\": 809400,\n    \"avg_ts\": 4.524566,\n    \"stddev_ts\": 0.000129,\n    \"samples_ns\": [ 28289161152, 28290088228, 28290773945 ],\n    \"samples_ts\": [ 4.5247, 4.52455, 4.52444 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:48:36Z\",\n    \"avg_ns\": 47571194923,\n    \"stddev_ns\": 3626578,\n    \"avg_ts\": 2.690704,\n    \"stddev_ts\": 0.000204,\n    \"samples_ns\": [ 47572898389, 47567044618, 47573641764 ],\n    \"samples_ts\": [ 2.69061, 2.69094, 2.69057 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:46:42Z",
+          "avg_ns": 28290007775,
+          "stddev_ns": 809400,
+          "avg_ts": 4.524566,
+          "stddev_ts": 0.000129,
+          "samples_ns": [
+            28289161152,
+            28290088228,
+            28290773945
+          ],
+          "samples_ts": [
+            4.5247,
+            4.52455,
+            4.52444
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:48:36Z",
+          "avg_ns": 47571194923,
+          "stddev_ns": 3626578,
+          "avg_ts": 2.690704,
+          "stddev_ts": 0.000204,
+          "samples_ns": [
+            47572898389,
+            47567044618,
+            47573641764
+          ],
+          "samples_ts": [
+            2.69061,
+            2.69094,
+            2.69057
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1040
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:02:28.110042+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:51:00Z\",\n    \"avg_ns\": 28289902729,\n    \"stddev_ns\": 605638,\n    \"avg_ts\": 4.524583,\n    \"stddev_ts\": 0.000093,\n    \"samples_ns\": [ 28290261292, 28290215467, 28289231429 ],\n    \"samples_ts\": [ 4.52453, 4.52453, 4.52469 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T12:52:53Z\",\n    \"avg_ns\": 191355000788,\n    \"stddev_ns\": 5302140,\n    \"avg_ts\": 2.675655,\n    \"stddev_ts\": 0.000074,\n    \"samples_ns\": [ 191358589644, 191348910690, 191357502030 ],\n    \"samples_ts\": [ 2.67561, 2.67574, 2.67562 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:51:00Z",
+          "avg_ns": 28289902729,
+          "stddev_ns": 605638,
+          "avg_ts": 4.524583,
+          "stddev_ts": 9.3e-05,
+          "samples_ns": [
+            28290261292,
+            28290215467,
+            28289231429
+          ],
+          "samples_ts": [
+            4.52453,
+            4.52453,
+            4.52469
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T12:52:53Z",
+          "avg_ns": 191355000788,
+          "stddev_ns": 5302140,
+          "avg_ts": 2.675655,
+          "stddev_ts": 7.4e-05,
+          "samples_ns": [
+            191358589644,
+            191348910690,
+            191357502030
+          ],
+          "samples_ts": [
+            2.67561,
+            2.67574,
+            2.67562
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1041
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:12:37.839489+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:02:29Z\",\n    \"avg_ns\": 116659303551,\n    \"stddev_ns\": 11314165,\n    \"avg_ts\": 4.388848,\n    \"stddev_ts\": 0.000426,\n    \"samples_ns\": [ 116672348180, 116652157879, 116653404594 ],\n    \"samples_ts\": [ 4.38836, 4.38912, 4.38907 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:10:15Z\",\n    \"avg_ns\": 47138758733,\n    \"stddev_ns\": 3667093,\n    \"avg_ts\": 2.715388,\n    \"stddev_ts\": 0.000211,\n    \"samples_ns\": [ 47142131039, 47134876228, 47139268934 ],\n    \"samples_ts\": [ 2.71519, 2.71561, 2.71536 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:02:29Z",
+          "avg_ns": 116659303551,
+          "stddev_ns": 11314165,
+          "avg_ts": 4.388848,
+          "stddev_ts": 0.000426,
+          "samples_ns": [
+            116672348180,
+            116652157879,
+            116653404594
+          ],
+          "samples_ts": [
+            4.38836,
+            4.38912,
+            4.38907
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:10:15Z",
+          "avg_ns": 47138758733,
+          "stddev_ns": 3667093,
+          "avg_ts": 2.715388,
+          "stddev_ts": 0.000211,
+          "samples_ns": [
+            47142131039,
+            47134876228,
+            47139268934
+          ],
+          "samples_ts": [
+            2.71519,
+            2.71561,
+            2.71536
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1042
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:30:02.598675+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:12:38Z\",\n    \"avg_ns\": 116702220876,\n    \"stddev_ns\": 2140996,\n    \"avg_ts\": 4.387234,\n    \"stddev_ts\": 0.000078,\n    \"samples_ns\": [ 116700452937, 116701688424, 116704521269 ],\n    \"samples_ts\": [ 4.3873, 4.38725, 4.38715 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:20:25Z\",\n    \"avg_ns\": 192091203951,\n    \"stddev_ns\": 4126546,\n    \"avg_ts\": 2.665401,\n    \"stddev_ts\": 0.000057,\n    \"samples_ns\": [ 192095953190, 192089163889, 192088494774 ],\n    \"samples_ts\": [ 2.66533, 2.66543, 2.66544 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:12:38Z",
+          "avg_ns": 116702220876,
+          "stddev_ns": 2140996,
+          "avg_ts": 4.387234,
+          "stddev_ts": 7.8e-05,
+          "samples_ns": [
+            116700452937,
+            116701688424,
+            116704521269
+          ],
+          "samples_ts": [
+            4.3873,
+            4.38725,
+            4.38715
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:20:25Z",
+          "avg_ns": 192091203951,
+          "stddev_ns": 4126546,
+          "avg_ts": 2.665401,
+          "stddev_ts": 5.7e-05,
+          "samples_ns": [
+            192095953190,
+            192089163889,
+            192088494774
+          ],
+          "samples_ts": [
+            2.66533,
+            2.66543,
+            2.66544
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1043
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:33:08.345544+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:30:03Z\",\n    \"avg_ns\": 14293132065,\n    \"stddev_ns\": 2578842,\n    \"avg_ts\": 8.955350,\n    \"stddev_ts\": 0.001612,\n    \"samples_ns\": [ 14295693580, 14293155465, 14290547152 ],\n    \"samples_ts\": [ 8.95375, 8.95534, 8.95697 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:31:00Z\",\n    \"avg_ns\": 42335845241,\n    \"stddev_ns\": 11314892,\n    \"avg_ts\": 3.023443,\n    \"stddev_ts\": 0.000808,\n    \"samples_ns\": [ 42330644244, 42328065999, 42348825480 ],\n    \"samples_ts\": [ 3.02381, 3.024, 3.02252 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:30:03Z",
+          "avg_ns": 14293132065,
+          "stddev_ns": 2578842,
+          "avg_ts": 8.95535,
+          "stddev_ts": 0.001612,
+          "samples_ns": [
+            14295693580,
+            14293155465,
+            14290547152
+          ],
+          "samples_ts": [
+            8.95375,
+            8.95534,
+            8.95697
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:31:00Z",
+          "avg_ns": 42335845241,
+          "stddev_ns": 11314892,
+          "avg_ts": 3.023443,
+          "stddev_ts": 0.000808,
+          "samples_ns": [
+            42330644244,
+            42328065999,
+            42348825480
+          ],
+          "samples_ts": [
+            3.02381,
+            3.024,
+            3.02252
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1044
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:42:38.703945+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:33:09Z\",\n    \"avg_ns\": 14290712627,\n    \"stddev_ns\": 441507,\n    \"avg_ts\": 8.956866,\n    \"stddev_ts\": 0.000256,\n    \"samples_ns\": [ 14291168801, 14290585914, 14290383168 ],\n    \"samples_ts\": [ 8.95658, 8.95695, 8.95707 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:34:06Z\",\n    \"avg_ns\": 170541362859,\n    \"stddev_ns\": 63814690,\n    \"avg_ts\": 3.002205,\n    \"stddev_ts\": 0.001124,\n    \"samples_ns\": [ 170471640459, 170555578615, 170596869504 ],\n    \"samples_ts\": [ 3.00343, 3.00195, 3.00123 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:33:09Z",
+          "avg_ns": 14290712627,
+          "stddev_ns": 441507,
+          "avg_ts": 8.956866,
+          "stddev_ts": 0.000256,
+          "samples_ns": [
+            14291168801,
+            14290585914,
+            14290383168
+          ],
+          "samples_ts": [
+            8.95658,
+            8.95695,
+            8.95707
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:34:06Z",
+          "avg_ns": 170541362859,
+          "stddev_ns": 63814690,
+          "avg_ts": 3.002205,
+          "stddev_ts": 0.001124,
+          "samples_ns": [
+            170471640459,
+            170555578615,
+            170596869504
+          ],
+          "samples_ts": [
+            3.00343,
+            3.00195,
+            3.00123
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1045
+    },
+    {
+      "timestamp_utc": "2025-12-10T13:48:36.565805+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:42:39Z\",\n    \"avg_ns\": 57424236161,\n    \"stddev_ns\": 3742248,\n    \"avg_ts\": 8.916096,\n    \"stddev_ts\": 0.000581,\n    \"samples_ns\": [ 57421475717, 57422737254, 57428495512 ],\n    \"samples_ts\": [ 8.91652, 8.91633, 8.91543 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:46:29Z\",\n    \"avg_ns\": 42199267377,\n    \"stddev_ns\": 15388372,\n    \"avg_ts\": 3.033228,\n    \"stddev_ts\": 0.001106,\n    \"samples_ns\": [ 42213964195, 42183274880, 42200563058 ],\n    \"samples_ts\": [ 3.03217, 3.03438, 3.03313 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:42:39Z",
+          "avg_ns": 57424236161,
+          "stddev_ns": 3742248,
+          "avg_ts": 8.916096,
+          "stddev_ts": 0.000581,
+          "samples_ns": [
+            57421475717,
+            57422737254,
+            57428495512
+          ],
+          "samples_ts": [
+            8.91652,
+            8.91633,
+            8.91543
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:46:29Z",
+          "avg_ns": 42199267377,
+          "stddev_ns": 15388372,
+          "avg_ts": 3.033228,
+          "stddev_ts": 0.001106,
+          "samples_ns": [
+            42213964195,
+            42183274880,
+            42200563058
+          ],
+          "samples_ts": [
+            3.03217,
+            3.03438,
+            3.03313
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1046
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:00:58.506304+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:48:37Z\",\n    \"avg_ns\": 57453057470,\n    \"stddev_ns\": 2192650,\n    \"avg_ts\": 8.911623,\n    \"stddev_ts\": 0.000340,\n    \"samples_ns\": [ 57450526883, 57454253389, 57454392138 ],\n    \"samples_ts\": [ 8.91202, 8.91144, 8.91142 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T13:52:27Z\",\n    \"avg_ns\": 170181641229,\n    \"stddev_ns\": 50821606,\n    \"avg_ts\": 3.008550,\n    \"stddev_ts\": 0.000898,\n    \"samples_ns\": [ 170138424353, 170168868540, 170237630794 ],\n    \"samples_ts\": [ 3.00931, 3.00878, 3.00756 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:48:37Z",
+          "avg_ns": 57453057470,
+          "stddev_ns": 2192650,
+          "avg_ts": 8.911623,
+          "stddev_ts": 0.00034,
+          "samples_ns": [
+            57450526883,
+            57454253389,
+            57454392138
+          ],
+          "samples_ts": [
+            8.91202,
+            8.91144,
+            8.91142
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T13:52:27Z",
+          "avg_ns": 170181641229,
+          "stddev_ns": 50821606,
+          "avg_ts": 3.00855,
+          "stddev_ts": 0.000898,
+          "samples_ns": [
+            170138424353,
+            170168868540,
+            170237630794
+          ],
+          "samples_ts": [
+            3.00931,
+            3.00878,
+            3.00756
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1047
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:04:04.380726+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:00:59Z\",\n    \"avg_ns\": 14290114637,\n    \"stddev_ns\": 395960,\n    \"avg_ts\": 8.957241,\n    \"stddev_ts\": 0.000224,\n    \"samples_ns\": [ 14289701614, 14290304924, 14290337375 ],\n    \"samples_ts\": [ 8.9575, 8.95712, 8.9571 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:01:56Z\",\n    \"avg_ns\": 42384680447,\n    \"stddev_ns\": 16631510,\n    \"avg_ts\": 3.019959,\n    \"stddev_ts\": 0.001185,\n    \"samples_ns\": [ 42403782351, 42376820745, 42373438247 ],\n    \"samples_ts\": [ 3.0186, 3.02052, 3.02076 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:00:59Z",
+          "avg_ns": 14290114637,
+          "stddev_ns": 395960,
+          "avg_ts": 8.957241,
+          "stddev_ts": 0.000224,
+          "samples_ns": [
+            14289701614,
+            14290304924,
+            14290337375
+          ],
+          "samples_ts": [
+            8.9575,
+            8.95712,
+            8.9571
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:01:56Z",
+          "avg_ns": 42384680447,
+          "stddev_ns": 16631510,
+          "avg_ts": 3.019959,
+          "stddev_ts": 0.001185,
+          "samples_ns": [
+            42403782351,
+            42376820745,
+            42373438247
+          ],
+          "samples_ts": [
+            3.0186,
+            3.02052,
+            3.02076
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1048
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:13:33.441650+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:04:05Z\",\n    \"avg_ns\": 14286777339,\n    \"stddev_ns\": 706216,\n    \"avg_ts\": 8.959333,\n    \"stddev_ts\": 0.000430,\n    \"samples_ns\": [ 14286325590, 14287566340, 14286440089 ],\n    \"samples_ts\": [ 8.95962, 8.95884, 8.95954 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:05:02Z\",\n    \"avg_ns\": 170103197373,\n    \"stddev_ns\": 30004587,\n    \"avg_ts\": 3.009938,\n    \"stddev_ts\": 0.000531,\n    \"samples_ns\": [ 170118810385, 170122176136, 170068605598 ],\n    \"samples_ts\": [ 3.00966, 3.0096, 3.01055 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:04:05Z",
+          "avg_ns": 14286777339,
+          "stddev_ns": 706216,
+          "avg_ts": 8.959333,
+          "stddev_ts": 0.00043,
+          "samples_ns": [
+            14286325590,
+            14287566340,
+            14286440089
+          ],
+          "samples_ts": [
+            8.95962,
+            8.95884,
+            8.95954
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:05:02Z",
+          "avg_ns": 170103197373,
+          "stddev_ns": 30004587,
+          "avg_ts": 3.009938,
+          "stddev_ts": 0.000531,
+          "samples_ns": [
+            170118810385,
+            170122176136,
+            170068605598
+          ],
+          "samples_ts": [
+            3.00966,
+            3.0096,
+            3.01055
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1049
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:19:33.435193+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:13:34Z\",\n    \"avg_ns\": 57978510336,\n    \"stddev_ns\": 4555214,\n    \"avg_ts\": 8.830858,\n    \"stddev_ts\": 0.000692,\n    \"samples_ns\": [ 57973285352, 57981521172, 57980724486 ],\n    \"samples_ts\": [ 8.83165, 8.8304, 8.83052 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:17:26Z\",\n    \"avg_ns\": 42159732727,\n    \"stddev_ns\": 6236860,\n    \"avg_ts\": 3.036072,\n    \"stddev_ts\": 0.000449,\n    \"samples_ns\": [ 42164761961, 42152753972, 42161682248 ],\n    \"samples_ts\": [ 3.03571, 3.03658, 3.03593 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:13:34Z",
+          "avg_ns": 57978510336,
+          "stddev_ns": 4555214,
+          "avg_ts": 8.830858,
+          "stddev_ts": 0.000692,
+          "samples_ns": [
+            57973285352,
+            57981521172,
+            57980724486
+          ],
+          "samples_ts": [
+            8.83165,
+            8.8304,
+            8.83052
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:17:26Z",
+          "avg_ns": 42159732727,
+          "stddev_ns": 6236860,
+          "avg_ts": 3.036072,
+          "stddev_ts": 0.000449,
+          "samples_ns": [
+            42164761961,
+            42152753972,
+            42161682248
+          ],
+          "samples_ts": [
+            3.03571,
+            3.03658,
+            3.03593
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1050
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:31:59.917709+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:19:34Z\",\n    \"avg_ns\": 58066588845,\n    \"stddev_ns\": 3556841,\n    \"avg_ts\": 8.817463,\n    \"stddev_ts\": 0.000539,\n    \"samples_ns\": [ 58068615240, 58068660020, 58062491276 ],\n    \"samples_ts\": [ 8.81716, 8.81715, 8.81809 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:23:26Z\",\n    \"avg_ns\": 170878071138,\n    \"stddev_ns\": 33936870,\n    \"avg_ts\": 2.996289,\n    \"stddev_ts\": 0.000595,\n    \"samples_ns\": [ 170913353082, 170875198358, 170845661974 ],\n    \"samples_ts\": [ 2.99567, 2.99634, 2.99686 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:19:34Z",
+          "avg_ns": 58066588845,
+          "stddev_ns": 3556841,
+          "avg_ts": 8.817463,
+          "stddev_ts": 0.000539,
+          "samples_ns": [
+            58068615240,
+            58068660020,
+            58062491276
+          ],
+          "samples_ts": [
+            8.81716,
+            8.81715,
+            8.81809
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:23:26Z",
+          "avg_ns": 170878071138,
+          "stddev_ns": 33936870,
+          "avg_ts": 2.996289,
+          "stddev_ts": 0.000595,
+          "samples_ns": [
+            170913353082,
+            170875198358,
+            170845661974
+          ],
+          "samples_ts": [
+            2.99567,
+            2.99634,
+            2.99686
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1051
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:35:05.301058+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:32:01Z\",\n    \"avg_ns\": 14298360591,\n    \"stddev_ns\": 4822309,\n    \"avg_ts\": 8.952076,\n    \"stddev_ts\": 0.003018,\n    \"samples_ns\": [ 14297117985, 14294282712, 14303681077 ],\n    \"samples_ts\": [ 8.95285, 8.95463, 8.94875 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:32:58Z\",\n    \"avg_ns\": 42195419963,\n    \"stddev_ns\": 8902772,\n    \"avg_ts\": 3.033505,\n    \"stddev_ts\": 0.000640,\n    \"samples_ns\": [ 42185406851, 42198421438, 42202431601 ],\n    \"samples_ts\": [ 3.03422, 3.03329, 3.033 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:32:01Z",
+          "avg_ns": 14298360591,
+          "stddev_ns": 4822309,
+          "avg_ts": 8.952076,
+          "stddev_ts": 0.003018,
+          "samples_ns": [
+            14297117985,
+            14294282712,
+            14303681077
+          ],
+          "samples_ts": [
+            8.95285,
+            8.95463,
+            8.94875
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:32:58Z",
+          "avg_ns": 42195419963,
+          "stddev_ns": 8902772,
+          "avg_ts": 3.033505,
+          "stddev_ts": 0.00064,
+          "samples_ns": [
+            42185406851,
+            42198421438,
+            42202431601
+          ],
+          "samples_ts": [
+            3.03422,
+            3.03329,
+            3.033
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1052
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:44:35.298883+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:35:06Z\",\n    \"avg_ns\": 14285441583,\n    \"stddev_ns\": 337231,\n    \"avg_ts\": 8.960171,\n    \"stddev_ts\": 0.000183,\n    \"samples_ns\": [ 14285292387, 14285254534, 14285777830 ],\n    \"samples_ts\": [ 8.96026, 8.96029, 8.95996 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:36:03Z\",\n    \"avg_ns\": 170427333686,\n    \"stddev_ns\": 23656075,\n    \"avg_ts\": 3.004213,\n    \"stddev_ts\": 0.000417,\n    \"samples_ns\": [ 170403724544, 170427254432, 170451022084 ],\n    \"samples_ts\": [ 3.00463, 3.00421, 3.0038 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:35:06Z",
+          "avg_ns": 14285441583,
+          "stddev_ns": 337231,
+          "avg_ts": 8.960171,
+          "stddev_ts": 0.000183,
+          "samples_ns": [
+            14285292387,
+            14285254534,
+            14285777830
+          ],
+          "samples_ts": [
+            8.96026,
+            8.96029,
+            8.95996
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:36:03Z",
+          "avg_ns": 170427333686,
+          "stddev_ns": 23656075,
+          "avg_ts": 3.004213,
+          "stddev_ts": 0.000417,
+          "samples_ns": [
+            170403724544,
+            170427254432,
+            170451022084
+          ],
+          "samples_ts": [
+            3.00463,
+            3.00421,
+            3.0038
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1053
+    },
+    {
+      "timestamp_utc": "2025-12-10T14:50:42.354065+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:44:36Z\",\n    \"avg_ns\": 59346523041,\n    \"stddev_ns\": 489658285,\n    \"avg_ts\": 8.627685,\n    \"stddev_ts\": 0.070848,\n    \"samples_ns\": [ 59911887260, 59057704808, 59069977056 ],\n    \"samples_ts\": [ 8.54588, 8.66949, 8.66769 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:48:34Z\",\n    \"avg_ns\": 42447978977,\n    \"stddev_ns\": 16464309,\n    \"avg_ts\": 3.015456,\n    \"stddev_ts\": 0.001169,\n    \"samples_ns\": [ 42466946497, 42439573872, 42437416564 ],\n    \"samples_ts\": [ 3.01411, 3.01605, 3.01621 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:44:36Z",
+          "avg_ns": 59346523041,
+          "stddev_ns": 489658285,
+          "avg_ts": 8.627685,
+          "stddev_ts": 0.070848,
+          "samples_ns": [
+            59911887260,
+            59057704808,
+            59069977056
+          ],
+          "samples_ts": [
+            8.54588,
+            8.66949,
+            8.66769
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:48:34Z",
+          "avg_ns": 42447978977,
+          "stddev_ns": 16464309,
+          "avg_ts": 3.015456,
+          "stddev_ts": 0.001169,
+          "samples_ns": [
+            42466946497,
+            42439573872,
+            42437416564
+          ],
+          "samples_ts": [
+            3.01411,
+            3.01605,
+            3.01621
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1054
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:03:12.939392+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:50:43Z\",\n    \"avg_ns\": 59013447480,\n    \"stddev_ns\": 6945251,\n    \"avg_ts\": 8.675989,\n    \"stddev_ts\": 0.001020,\n    \"samples_ns\": [ 59019841815, 59014427976, 59006072651 ],\n    \"samples_ts\": [ 8.67505, 8.67584, 8.67707 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T14:54:39Z\",\n    \"avg_ns\": 170974332755,\n    \"stddev_ns\": 27239555,\n    \"avg_ts\": 2.994602,\n    \"stddev_ts\": 0.000477,\n    \"samples_ns\": [ 170998882886, 170945039472, 170979075909 ],\n    \"samples_ts\": [ 2.99417, 2.99511, 2.99452 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:50:43Z",
+          "avg_ns": 59013447480,
+          "stddev_ns": 6945251,
+          "avg_ts": 8.675989,
+          "stddev_ts": 0.00102,
+          "samples_ns": [
+            59019841815,
+            59014427976,
+            59006072651
+          ],
+          "samples_ts": [
+            8.67505,
+            8.67584,
+            8.67707
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T14:54:39Z",
+          "avg_ns": 170974332755,
+          "stddev_ns": 27239555,
+          "avg_ts": 2.994602,
+          "stddev_ts": 0.000477,
+          "samples_ns": [
+            170998882886,
+            170945039472,
+            170979075909
+          ],
+          "samples_ts": [
+            2.99417,
+            2.99511,
+            2.99452
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1055
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:06:19.296960+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:03:14Z\",\n    \"avg_ns\": 14401794779,\n    \"stddev_ns\": 182538467,\n    \"avg_ts\": 8.888726,\n    \"stddev_ts\": 0.111844,\n    \"samples_ns\": [ 14612572044, 14296430757, 14296381536 ],\n    \"samples_ts\": [ 8.75958, 8.95328, 8.95331 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:04:11Z\",\n    \"avg_ns\": 42322736665,\n    \"stddev_ns\": 5632847,\n    \"avg_ts\": 3.024379,\n    \"stddev_ts\": 0.000402,\n    \"samples_ns\": [ 42316282354, 42325330795, 42326596848 ],\n    \"samples_ts\": [ 3.02484, 3.02419, 3.0241 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:03:14Z",
+          "avg_ns": 14401794779,
+          "stddev_ns": 182538467,
+          "avg_ts": 8.888726,
+          "stddev_ts": 0.111844,
+          "samples_ns": [
+            14612572044,
+            14296430757,
+            14296381536
+          ],
+          "samples_ts": [
+            8.75958,
+            8.95328,
+            8.95331
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:04:11Z",
+          "avg_ns": 42322736665,
+          "stddev_ns": 5632847,
+          "avg_ts": 3.024379,
+          "stddev_ts": 0.000402,
+          "samples_ns": [
+            42316282354,
+            42325330795,
+            42326596848
+          ],
+          "samples_ts": [
+            3.02484,
+            3.02419,
+            3.0241
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1056
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:15:50.111361+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:06:20Z\",\n    \"avg_ns\": 14298499482,\n    \"stddev_ns\": 1735507,\n    \"avg_ts\": 8.951988,\n    \"stddev_ts\": 0.001084,\n    \"samples_ns\": [ 14297403150, 14300495484, 14297599813 ],\n    \"samples_ts\": [ 8.95267, 8.95074, 8.95255 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:07:17Z\",\n    \"avg_ns\": 170680121804,\n    \"stddev_ns\": 42224928,\n    \"avg_ts\": 2.999764,\n    \"stddev_ts\": 0.000742,\n    \"samples_ns\": [ 170644664621, 170726830821, 170668869971 ],\n    \"samples_ts\": [ 3.00039, 2.99894, 2.99996 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:06:20Z",
+          "avg_ns": 14298499482,
+          "stddev_ns": 1735507,
+          "avg_ts": 8.951988,
+          "stddev_ts": 0.001084,
+          "samples_ns": [
+            14297403150,
+            14300495484,
+            14297599813
+          ],
+          "samples_ts": [
+            8.95267,
+            8.95074,
+            8.95255
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:07:17Z",
+          "avg_ns": 170680121804,
+          "stddev_ns": 42224928,
+          "avg_ts": 2.999764,
+          "stddev_ts": 0.000742,
+          "samples_ns": [
+            170644664621,
+            170726830821,
+            170668869971
+          ],
+          "samples_ts": [
+            3.00039,
+            2.99894,
+            2.99996
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1057
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:21:48.865654+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:15:51Z\",\n    \"avg_ns\": 57427236807,\n    \"stddev_ns\": 1237369,\n    \"avg_ts\": 8.915630,\n    \"stddev_ts\": 0.000188,\n    \"samples_ns\": [ 57426099964, 57428515403, 57427095055 ],\n    \"samples_ts\": [ 8.91581, 8.91543, 8.91565 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:19:40Z\",\n    \"avg_ns\": 42487768542,\n    \"stddev_ns\": 2828391,\n    \"avg_ts\": 3.012632,\n    \"stddev_ts\": 0.000199,\n    \"samples_ns\": [ 42490415196, 42488076547, 42484813885 ],\n    \"samples_ts\": [ 3.01244, 3.01261, 3.01284 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:15:51Z",
+          "avg_ns": 57427236807,
+          "stddev_ns": 1237369,
+          "avg_ts": 8.91563,
+          "stddev_ts": 0.000188,
+          "samples_ns": [
+            57426099964,
+            57428515403,
+            57427095055
+          ],
+          "samples_ts": [
+            8.91581,
+            8.91543,
+            8.91565
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:19:40Z",
+          "avg_ns": 42487768542,
+          "stddev_ns": 2828391,
+          "avg_ts": 3.012632,
+          "stddev_ts": 0.000199,
+          "samples_ns": [
+            42490415196,
+            42488076547,
+            42484813885
+          ],
+          "samples_ts": [
+            3.01244,
+            3.01261,
+            3.01284
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1058
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:34:12.046003+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:21:49Z\",\n    \"avg_ns\": 57430616816,\n    \"stddev_ns\": 1139641,\n    \"avg_ts\": 8.915105,\n    \"stddev_ts\": 0.000177,\n    \"samples_ns\": [ 57430740036, 57431689840, 57429420572 ],\n    \"samples_ts\": [ 8.91509, 8.91494, 8.91529 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:25:39Z\",\n    \"avg_ns\": 170619465652,\n    \"stddev_ns\": 81141085,\n    \"avg_ts\": 3.000830,\n    \"stddev_ts\": 0.001427,\n    \"samples_ns\": [ 170631264341, 170694060397, 170533072219 ],\n    \"samples_ts\": [ 3.00062, 2.99952, 3.00235 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:21:49Z",
+          "avg_ns": 57430616816,
+          "stddev_ns": 1139641,
+          "avg_ts": 8.915105,
+          "stddev_ts": 0.000177,
+          "samples_ns": [
+            57430740036,
+            57431689840,
+            57429420572
+          ],
+          "samples_ts": [
+            8.91509,
+            8.91494,
+            8.91529
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:25:39Z",
+          "avg_ns": 170619465652,
+          "stddev_ns": 81141085,
+          "avg_ts": 3.00083,
+          "stddev_ts": 0.001427,
+          "samples_ns": [
+            170631264341,
+            170694060397,
+            170533072219
+          ],
+          "samples_ts": [
+            3.00062,
+            2.99952,
+            3.00235
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1059
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:37:18.145237+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:34:13Z\",\n    \"avg_ns\": 14289724170,\n    \"stddev_ns\": 963131,\n    \"avg_ts\": 8.957486,\n    \"stddev_ts\": 0.000594,\n    \"samples_ns\": [ 14290816448, 14289243145, 14289112919 ],\n    \"samples_ts\": [ 8.9568, 8.95779, 8.95787 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:35:10Z\",\n    \"avg_ns\": 42457986359,\n    \"stddev_ns\": 30121234,\n    \"avg_ts\": 3.014746,\n    \"stddev_ts\": 0.002140,\n    \"samples_ns\": [ 42473224809, 42477443526, 42423290742 ],\n    \"samples_ts\": [ 3.01366, 3.01336, 3.01721 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:34:13Z",
+          "avg_ns": 14289724170,
+          "stddev_ns": 963131,
+          "avg_ts": 8.957486,
+          "stddev_ts": 0.000594,
+          "samples_ns": [
+            14290816448,
+            14289243145,
+            14289112919
+          ],
+          "samples_ts": [
+            8.9568,
+            8.95779,
+            8.95787
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:35:10Z",
+          "avg_ns": 42457986359,
+          "stddev_ns": 30121234,
+          "avg_ts": 3.014746,
+          "stddev_ts": 0.00214,
+          "samples_ns": [
+            42473224809,
+            42477443526,
+            42423290742
+          ],
+          "samples_ts": [
+            3.01366,
+            3.01336,
+            3.01721
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1060
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:46:48.907595+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:37:19Z\",\n    \"avg_ns\": 14292681526,\n    \"stddev_ns\": 746372,\n    \"avg_ts\": 8.955632,\n    \"stddev_ts\": 0.000462,\n    \"samples_ns\": [ 14293075829, 14291831556, 14293137194 ],\n    \"samples_ts\": [ 8.95539, 8.95616, 8.95535 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:38:16Z\",\n    \"avg_ns\": 170673149242,\n    \"stddev_ns\": 39477355,\n    \"avg_ts\": 2.999886,\n    \"stddev_ts\": 0.000694,\n    \"samples_ns\": [ 170715963019, 170638195164, 170665289544 ],\n    \"samples_ts\": [ 2.99913, 3.0005, 3.00002 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:37:19Z",
+          "avg_ns": 14292681526,
+          "stddev_ns": 746372,
+          "avg_ts": 8.955632,
+          "stddev_ts": 0.000462,
+          "samples_ns": [
+            14293075829,
+            14291831556,
+            14293137194
+          ],
+          "samples_ts": [
+            8.95539,
+            8.95616,
+            8.95535
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:38:16Z",
+          "avg_ns": 170673149242,
+          "stddev_ns": 39477355,
+          "avg_ts": 2.999886,
+          "stddev_ts": 0.000694,
+          "samples_ns": [
+            170715963019,
+            170638195164,
+            170665289544
+          ],
+          "samples_ts": [
+            2.99913,
+            3.0005,
+            3.00002
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1061
+    },
+    {
+      "timestamp_utc": "2025-12-10T15:52:49.869727+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:46:50Z\",\n    \"avg_ns\": 58022684191,\n    \"stddev_ns\": 537624,\n    \"avg_ts\": 8.824135,\n    \"stddev_ts\": 0.000082,\n    \"samples_ns\": [ 58022798661, 58023155362, 58022098550 ],\n    \"samples_ts\": [ 8.82412, 8.82406, 8.82422 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:50:42Z\",\n    \"avg_ns\": 42434788665,\n    \"stddev_ns\": 11019045,\n    \"avg_ts\": 3.016393,\n    \"stddev_ts\": 0.000783,\n    \"samples_ns\": [ 42437753454, 42444018041, 42422594502 ],\n    \"samples_ts\": [ 3.01618, 3.01574, 3.01726 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:46:50Z",
+          "avg_ns": 58022684191,
+          "stddev_ns": 537624,
+          "avg_ts": 8.824135,
+          "stddev_ts": 8.2e-05,
+          "samples_ns": [
+            58022798661,
+            58023155362,
+            58022098550
+          ],
+          "samples_ts": [
+            8.82412,
+            8.82406,
+            8.82422
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:50:42Z",
+          "avg_ns": 42434788665,
+          "stddev_ns": 11019045,
+          "avg_ts": 3.016393,
+          "stddev_ts": 0.000783,
+          "samples_ns": [
+            42437753454,
+            42444018041,
+            42422594502
+          ],
+          "samples_ts": [
+            3.01618,
+            3.01574,
+            3.01726
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1062
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:05:16.020967+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:52:50Z\",\n    \"avg_ns\": 57997642374,\n    \"stddev_ns\": 2282930,\n    \"avg_ts\": 8.827945,\n    \"stddev_ts\": 0.000346,\n    \"samples_ns\": [ 57999212480, 57995039397, 57998675246 ],\n    \"samples_ts\": [ 8.82771, 8.82834, 8.82779 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T15:56:43Z\",\n    \"avg_ns\": 170828361929,\n    \"stddev_ns\": 100059644,\n    \"avg_ts\": 2.997161,\n    \"stddev_ts\": 0.001756,\n    \"samples_ns\": [ 170725930532, 170833289203, 170925866053 ],\n    \"samples_ts\": [ 2.99896, 2.99707, 2.99545 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:52:50Z",
+          "avg_ns": 57997642374,
+          "stddev_ns": 2282930,
+          "avg_ts": 8.827945,
+          "stddev_ts": 0.000346,
+          "samples_ns": [
+            57999212480,
+            57995039397,
+            57998675246
+          ],
+          "samples_ts": [
+            8.82771,
+            8.82834,
+            8.82779
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T15:56:43Z",
+          "avg_ns": 170828361929,
+          "stddev_ns": 100059644,
+          "avg_ts": 2.997161,
+          "stddev_ts": 0.001756,
+          "samples_ns": [
+            170725930532,
+            170833289203,
+            170925866053
+          ],
+          "samples_ts": [
+            2.99896,
+            2.99707,
+            2.99545
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1063
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:08:21.685997+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:05:17Z\",\n    \"avg_ns\": 14295321506,\n    \"stddev_ns\": 1255132,\n    \"avg_ts\": 8.953978,\n    \"stddev_ts\": 0.000779,\n    \"samples_ns\": [ 14294968948, 14294292156, 14296703416 ],\n    \"samples_ts\": [ 8.9542, 8.95462, 8.95311 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:06:14Z\",\n    \"avg_ns\": 42303048163,\n    \"stddev_ns\": 16477744,\n    \"avg_ts\": 3.025787,\n    \"stddev_ts\": 0.001179,\n    \"samples_ns\": [ 42312854975, 42312262169, 42284027347 ],\n    \"samples_ts\": [ 3.02509, 3.02513, 3.02715 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:05:17Z",
+          "avg_ns": 14295321506,
+          "stddev_ns": 1255132,
+          "avg_ts": 8.953978,
+          "stddev_ts": 0.000779,
+          "samples_ns": [
+            14294968948,
+            14294292156,
+            14296703416
+          ],
+          "samples_ts": [
+            8.9542,
+            8.95462,
+            8.95311
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:06:14Z",
+          "avg_ns": 42303048163,
+          "stddev_ns": 16477744,
+          "avg_ts": 3.025787,
+          "stddev_ts": 0.001179,
+          "samples_ns": [
+            42312854975,
+            42312262169,
+            42284027347
+          ],
+          "samples_ts": [
+            3.02509,
+            3.02513,
+            3.02715
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1064
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:17:53.433313+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:08:22Z\",\n    \"avg_ns\": 14448547835,\n    \"stddev_ns\": 164894183,\n    \"avg_ts\": 8.859789,\n    \"stddev_ts\": 0.100893,\n    \"samples_ns\": [ 14623281253, 14426688773, 14295673480 ],\n    \"samples_ts\": [ 8.75317, 8.87244, 8.95376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:09:20Z\",\n    \"avg_ns\": 170734659215,\n    \"stddev_ns\": 74835148,\n    \"avg_ts\": 2.998806,\n    \"stddev_ts\": 0.001315,\n    \"samples_ns\": [ 170649321893, 170789080070, 170765575684 ],\n    \"samples_ts\": [ 3.0003, 2.99785, 2.99826 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:08:22Z",
+          "avg_ns": 14448547835,
+          "stddev_ns": 164894183,
+          "avg_ts": 8.859789,
+          "stddev_ts": 0.100893,
+          "samples_ns": [
+            14623281253,
+            14426688773,
+            14295673480
+          ],
+          "samples_ts": [
+            8.75317,
+            8.87244,
+            8.95376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:09:20Z",
+          "avg_ns": 170734659215,
+          "stddev_ns": 74835148,
+          "avg_ts": 2.998806,
+          "stddev_ts": 0.001315,
+          "samples_ns": [
+            170649321893,
+            170789080070,
+            170765575684
+          ],
+          "samples_ts": [
+            3.0003,
+            2.99785,
+            2.99826
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1065
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:23:58.111069+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:17:54Z\",\n    \"avg_ns\": 59007286229,\n    \"stddev_ns\": 2112905,\n    \"avg_ts\": 8.676895,\n    \"stddev_ts\": 0.000307,\n    \"samples_ns\": [ 59005576493, 59009608734, 59006673462 ],\n    \"samples_ts\": [ 8.67715, 8.67655, 8.67698 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:21:50Z\",\n    \"avg_ns\": 42346883281,\n    \"stddev_ns\": 6561150,\n    \"avg_ts\": 3.022655,\n    \"stddev_ts\": 0.000468,\n    \"samples_ns\": [ 42339314575, 42350669948, 42350665322 ],\n    \"samples_ts\": [ 3.02319, 3.02238, 3.02238 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:17:54Z",
+          "avg_ns": 59007286229,
+          "stddev_ns": 2112905,
+          "avg_ts": 8.676895,
+          "stddev_ts": 0.000307,
+          "samples_ns": [
+            59005576493,
+            59009608734,
+            59006673462
+          ],
+          "samples_ts": [
+            8.67715,
+            8.67655,
+            8.67698
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:21:50Z",
+          "avg_ns": 42346883281,
+          "stddev_ns": 6561150,
+          "avg_ts": 3.022655,
+          "stddev_ts": 0.000468,
+          "samples_ns": [
+            42339314575,
+            42350669948,
+            42350665322
+          ],
+          "samples_ts": [
+            3.02319,
+            3.02238,
+            3.02238
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1066
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:36:28.951062+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:23:59Z\",\n    \"avg_ns\": 59012594186,\n    \"stddev_ns\": 4594830,\n    \"avg_ts\": 8.676114,\n    \"stddev_ts\": 0.000675,\n    \"samples_ns\": [ 59017689972, 59011302551, 59008790036 ],\n    \"samples_ts\": [ 8.67536, 8.6763, 8.67667 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:27:55Z\",\n    \"avg_ns\": 171064778498,\n    \"stddev_ns\": 48350843,\n    \"avg_ts\": 2.993018,\n    \"stddev_ts\": 0.000846,\n    \"samples_ns\": [ 171115758079, 171019579705, 171058997711 ],\n    \"samples_ts\": [ 2.99213, 2.99381, 2.99312 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:23:59Z",
+          "avg_ns": 59012594186,
+          "stddev_ns": 4594830,
+          "avg_ts": 8.676114,
+          "stddev_ts": 0.000675,
+          "samples_ns": [
+            59017689972,
+            59011302551,
+            59008790036
+          ],
+          "samples_ts": [
+            8.67536,
+            8.6763,
+            8.67667
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:27:55Z",
+          "avg_ns": 171064778498,
+          "stddev_ns": 48350843,
+          "avg_ts": 2.993018,
+          "stddev_ts": 0.000846,
+          "samples_ns": [
+            171115758079,
+            171019579705,
+            171058997711
+          ],
+          "samples_ts": [
+            2.99213,
+            2.99381,
+            2.99312
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1067
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:39:35.646240+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:36:30Z\",\n    \"avg_ns\": 14341286401,\n    \"stddev_ns\": 91814848,\n    \"avg_ts\": 8.925523,\n    \"stddev_ts\": 0.056932,\n    \"samples_ns\": [ 14447295052, 14287020810, 14289543341 ],\n    \"samples_ts\": [ 8.85979, 8.95918, 8.9576 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:37:27Z\",\n    \"avg_ns\": 42485054827,\n    \"stddev_ns\": 16486098,\n    \"avg_ts\": 3.012824,\n    \"stddev_ts\": 0.001169,\n    \"samples_ns\": [ 42499230583, 42466963431, 42488970467 ],\n    \"samples_ts\": [ 3.01182, 3.01411, 3.01255 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:36:30Z",
+          "avg_ns": 14341286401,
+          "stddev_ns": 91814848,
+          "avg_ts": 8.925523,
+          "stddev_ts": 0.056932,
+          "samples_ns": [
+            14447295052,
+            14287020810,
+            14289543341
+          ],
+          "samples_ts": [
+            8.85979,
+            8.95918,
+            8.9576
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:37:27Z",
+          "avg_ns": 42485054827,
+          "stddev_ns": 16486098,
+          "avg_ts": 3.012824,
+          "stddev_ts": 0.001169,
+          "samples_ns": [
+            42499230583,
+            42466963431,
+            42488970467
+          ],
+          "samples_ts": [
+            3.01182,
+            3.01411,
+            3.01255
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1068
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:49:06.895454+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:39:36Z\",\n    \"avg_ns\": 14312275262,\n    \"stddev_ns\": 49476531,\n    \"avg_ts\": 8.943443,\n    \"stddev_ts\": 0.030855,\n    \"samples_ns\": [ 14369404276, 14283386048, 14284035464 ],\n    \"samples_ts\": [ 8.90782, 8.96146, 8.96105 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:40:34Z\",\n    \"avg_ns\": 170783966331,\n    \"stddev_ns\": 26235330,\n    \"avg_ts\": 2.997940,\n    \"stddev_ts\": 0.000461,\n    \"samples_ns\": [ 170760530093, 170779060683, 170812308217 ],\n    \"samples_ts\": [ 2.99835, 2.99803, 2.99744 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:39:36Z",
+          "avg_ns": 14312275262,
+          "stddev_ns": 49476531,
+          "avg_ts": 8.943443,
+          "stddev_ts": 0.030855,
+          "samples_ns": [
+            14369404276,
+            14283386048,
+            14284035464
+          ],
+          "samples_ts": [
+            8.90782,
+            8.96146,
+            8.96105
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:40:34Z",
+          "avg_ns": 170783966331,
+          "stddev_ns": 26235330,
+          "avg_ts": 2.99794,
+          "stddev_ts": 0.000461,
+          "samples_ns": [
+            170760530093,
+            170779060683,
+            170812308217
+          ],
+          "samples_ts": [
+            2.99835,
+            2.99803,
+            2.99744
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1069
+    },
+    {
+      "timestamp_utc": "2025-12-10T16:55:05.703313+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:49:08Z\",\n    \"avg_ns\": 57507514110,\n    \"stddev_ns\": 21079647,\n    \"avg_ts\": 8.903185,\n    \"stddev_ts\": 0.003263,\n    \"samples_ns\": [ 57489469284, 57502391482, 57530681565 ],\n    \"samples_ts\": [ 8.90598, 8.90398, 8.8996 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:52:58Z\",\n    \"avg_ns\": 42400591319,\n    \"stddev_ns\": 30123417,\n    \"avg_ts\": 3.018827,\n    \"stddev_ts\": 0.002146,\n    \"samples_ns\": [ 42366119559, 42421851327, 42413803071 ],\n    \"samples_ts\": [ 3.02128, 3.01731, 3.01789 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:49:08Z",
+          "avg_ns": 57507514110,
+          "stddev_ns": 21079647,
+          "avg_ts": 8.903185,
+          "stddev_ts": 0.003263,
+          "samples_ns": [
+            57489469284,
+            57502391482,
+            57530681565
+          ],
+          "samples_ts": [
+            8.90598,
+            8.90398,
+            8.8996
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:52:58Z",
+          "avg_ns": 42400591319,
+          "stddev_ns": 30123417,
+          "avg_ts": 3.018827,
+          "stddev_ts": 0.002146,
+          "samples_ns": [
+            42366119559,
+            42421851327,
+            42413803071
+          ],
+          "samples_ts": [
+            3.02128,
+            3.01731,
+            3.01789
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1070
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:07:31.568741+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:55:06Z\",\n    \"avg_ns\": 57512770002,\n    \"stddev_ns\": 3349860,\n    \"avg_ts\": 8.902371,\n    \"stddev_ts\": 0.000516,\n    \"samples_ns\": [ 57516423208, 57511990830, 57509895970 ],\n    \"samples_ts\": [ 8.90181, 8.90249, 8.90282 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T16:58:56Z\",\n    \"avg_ns\": 171408980253,\n    \"stddev_ns\": 32452774,\n    \"avg_ts\": 2.987008,\n    \"stddev_ts\": 0.000566,\n    \"samples_ns\": [ 171405541103, 171443015641, 171378384015 ],\n    \"samples_ts\": [ 2.98707, 2.98642, 2.98754 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:55:06Z",
+          "avg_ns": 57512770002,
+          "stddev_ns": 3349860,
+          "avg_ts": 8.902371,
+          "stddev_ts": 0.000516,
+          "samples_ns": [
+            57516423208,
+            57511990830,
+            57509895970
+          ],
+          "samples_ts": [
+            8.90181,
+            8.90249,
+            8.90282
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T16:58:56Z",
+          "avg_ns": 171408980253,
+          "stddev_ns": 32452774,
+          "avg_ts": 2.987008,
+          "stddev_ts": 0.000566,
+          "samples_ns": [
+            171405541103,
+            171443015641,
+            171378384015
+          ],
+          "samples_ts": [
+            2.98707,
+            2.98642,
+            2.98754
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1071
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:10:37.358425+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:07:32Z\",\n    \"avg_ns\": 14291536325,\n    \"stddev_ns\": 758836,\n    \"avg_ts\": 8.956350,\n    \"stddev_ts\": 0.000470,\n    \"samples_ns\": [ 14291377673, 14290878995, 14292352308 ],\n    \"samples_ts\": [ 8.95645, 8.95676, 8.95584 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:08:29Z\",\n    \"avg_ns\": 42345534257,\n    \"stddev_ns\": 19048152,\n    \"avg_ts\": 3.022751,\n    \"stddev_ts\": 0.001360,\n    \"samples_ns\": [ 42323790874, 42353548260, 42359263639 ],\n    \"samples_ts\": [ 3.0243, 3.02218, 3.02177 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:07:32Z",
+          "avg_ns": 14291536325,
+          "stddev_ns": 758836,
+          "avg_ts": 8.95635,
+          "stddev_ts": 0.00047,
+          "samples_ns": [
+            14291377673,
+            14290878995,
+            14292352308
+          ],
+          "samples_ts": [
+            8.95645,
+            8.95676,
+            8.95584
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:08:29Z",
+          "avg_ns": 42345534257,
+          "stddev_ns": 19048152,
+          "avg_ts": 3.022751,
+          "stddev_ts": 0.00136,
+          "samples_ns": [
+            42323790874,
+            42353548260,
+            42359263639
+          ],
+          "samples_ts": [
+            3.0243,
+            3.02218,
+            3.02177
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1072
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:20:10.548126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:10:38Z\",\n    \"avg_ns\": 14303505133,\n    \"stddev_ns\": 1648755,\n    \"avg_ts\": 8.948856,\n    \"stddev_ts\": 0.001026,\n    \"samples_ns\": [ 14302563055, 14305398901, 14302553445 ],\n    \"samples_ts\": [ 8.94944, 8.94767, 8.94945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:11:35Z\",\n    \"avg_ns\": 171467178281,\n    \"stddev_ns\": 37233916,\n    \"avg_ts\": 2.985994,\n    \"stddev_ts\": 0.000648,\n    \"samples_ns\": [ 171442271916, 171449281517, 171509981410 ],\n    \"samples_ts\": [ 2.98643, 2.98631, 2.98525 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:10:38Z",
+          "avg_ns": 14303505133,
+          "stddev_ns": 1648755,
+          "avg_ts": 8.948856,
+          "stddev_ts": 0.001026,
+          "samples_ns": [
+            14302563055,
+            14305398901,
+            14302553445
+          ],
+          "samples_ts": [
+            8.94944,
+            8.94767,
+            8.94945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:11:35Z",
+          "avg_ns": 171467178281,
+          "stddev_ns": 37233916,
+          "avg_ts": 2.985994,
+          "stddev_ts": 0.000648,
+          "samples_ns": [
+            171442271916,
+            171449281517,
+            171509981410
+          ],
+          "samples_ts": [
+            2.98643,
+            2.98631,
+            2.98525
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1073
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:26:11.132179+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:20:11Z\",\n    \"avg_ns\": 57957789337,\n    \"stddev_ns\": 1704110,\n    \"avg_ts\": 8.834015,\n    \"stddev_ts\": 0.000257,\n    \"samples_ns\": [ 57956348424, 57959645074, 57957374514 ],\n    \"samples_ts\": [ 8.83423, 8.83373, 8.83408 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:24:03Z\",\n    \"avg_ns\": 42385695435,\n    \"stddev_ns\": 23301687,\n    \"avg_ts\": 3.019887,\n    \"stddev_ts\": 0.001660,\n    \"samples_ns\": [ 42362428521, 42385626042, 42409031742 ],\n    \"samples_ts\": [ 3.02155, 3.01989, 3.01823 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:20:11Z",
+          "avg_ns": 57957789337,
+          "stddev_ns": 1704110,
+          "avg_ts": 8.834015,
+          "stddev_ts": 0.000257,
+          "samples_ns": [
+            57956348424,
+            57959645074,
+            57957374514
+          ],
+          "samples_ts": [
+            8.83423,
+            8.83373,
+            8.83408
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:24:03Z",
+          "avg_ns": 42385695435,
+          "stddev_ns": 23301687,
+          "avg_ts": 3.019887,
+          "stddev_ts": 0.00166,
+          "samples_ns": [
+            42362428521,
+            42385626042,
+            42409031742
+          ],
+          "samples_ts": [
+            3.02155,
+            3.01989,
+            3.01823
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1074
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:38:35.806960+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:26:12Z\",\n    \"avg_ns\": 57975464376,\n    \"stddev_ns\": 1855106,\n    \"avg_ts\": 8.831322,\n    \"stddev_ts\": 0.000280,\n    \"samples_ns\": [ 57977426479, 57975187639, 57973779011 ],\n    \"samples_ts\": [ 8.83102, 8.83136, 8.83158 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:30:04Z\",\n    \"avg_ns\": 170384357943,\n    \"stddev_ns\": 24617510,\n    \"avg_ts\": 3.004971,\n    \"stddev_ts\": 0.000434,\n    \"samples_ns\": [ 170356312148, 170394390044, 170402371638 ],\n    \"samples_ts\": [ 3.00547, 3.00479, 3.00465 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:26:12Z",
+          "avg_ns": 57975464376,
+          "stddev_ns": 1855106,
+          "avg_ts": 8.831322,
+          "stddev_ts": 0.00028,
+          "samples_ns": [
+            57977426479,
+            57975187639,
+            57973779011
+          ],
+          "samples_ts": [
+            8.83102,
+            8.83136,
+            8.83158
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:30:04Z",
+          "avg_ns": 170384357943,
+          "stddev_ns": 24617510,
+          "avg_ts": 3.004971,
+          "stddev_ts": 0.000434,
+          "samples_ns": [
+            170356312148,
+            170394390044,
+            170402371638
+          ],
+          "samples_ts": [
+            3.00547,
+            3.00479,
+            3.00465
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1075
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:41:41.950283+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:38:36Z\",\n    \"avg_ns\": 14288014928,\n    \"stddev_ns\": 824749,\n    \"avg_ts\": 8.958557,\n    \"stddev_ts\": 0.000512,\n    \"samples_ns\": [ 14288481717, 14287072658, 14288490410 ],\n    \"samples_ts\": [ 8.95826, 8.95915, 8.95826 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:39:34Z\",\n    \"avg_ns\": 42475350109,\n    \"stddev_ns\": 12176215,\n    \"avg_ts\": 3.013513,\n    \"stddev_ts\": 0.000864,\n    \"samples_ns\": [ 42478001737, 42485982007, 42462066583 ],\n    \"samples_ts\": [ 3.01332, 3.01276, 3.01446 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:38:36Z",
+          "avg_ns": 14288014928,
+          "stddev_ns": 824749,
+          "avg_ts": 8.958557,
+          "stddev_ts": 0.000512,
+          "samples_ns": [
+            14288481717,
+            14287072658,
+            14288490410
+          ],
+          "samples_ts": [
+            8.95826,
+            8.95915,
+            8.95826
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:39:34Z",
+          "avg_ns": 42475350109,
+          "stddev_ns": 12176215,
+          "avg_ts": 3.013513,
+          "stddev_ts": 0.000864,
+          "samples_ns": [
+            42478001737,
+            42485982007,
+            42462066583
+          ],
+          "samples_ts": [
+            3.01332,
+            3.01276,
+            3.01446
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1076
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:51:13.095393+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:41:43Z\",\n    \"avg_ns\": 14287827809,\n    \"stddev_ns\": 719781,\n    \"avg_ts\": 8.958675,\n    \"stddev_ts\": 0.000445,\n    \"samples_ns\": [ 14287026590, 14288078974, 14288377864 ],\n    \"samples_ts\": [ 8.95918, 8.95852, 8.95833 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:42:40Z\",\n    \"avg_ns\": 170805372060,\n    \"stddev_ns\": 35669019,\n    \"avg_ts\": 2.997564,\n    \"stddev_ts\": 0.000626,\n    \"samples_ns\": [ 170793006404, 170845573343, 170777536435 ],\n    \"samples_ts\": [ 2.99778, 2.99686, 2.99805 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:41:43Z",
+          "avg_ns": 14287827809,
+          "stddev_ns": 719781,
+          "avg_ts": 8.958675,
+          "stddev_ts": 0.000445,
+          "samples_ns": [
+            14287026590,
+            14288078974,
+            14288377864
+          ],
+          "samples_ts": [
+            8.95918,
+            8.95852,
+            8.95833
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:42:40Z",
+          "avg_ns": 170805372060,
+          "stddev_ns": 35669019,
+          "avg_ts": 2.997564,
+          "stddev_ts": 0.000626,
+          "samples_ns": [
+            170793006404,
+            170845573343,
+            170777536435
+          ],
+          "samples_ts": [
+            2.99778,
+            2.99686,
+            2.99805
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1077
+    },
+    {
+      "timestamp_utc": "2025-12-10T17:57:18.586655+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:51:14Z\",\n    \"avg_ns\": 59069575810,\n    \"stddev_ns\": 3436129,\n    \"avg_ts\": 8.667745,\n    \"stddev_ts\": 0.000503,\n    \"samples_ns\": [ 59065681090, 59070913844, 59072132497 ],\n    \"samples_ts\": [ 8.66832, 8.66755, 8.66737 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:55:10Z\",\n    \"avg_ns\": 42534502177,\n    \"stddev_ns\": 6873096,\n    \"avg_ts\": 3.009322,\n    \"stddev_ts\": 0.000486,\n    \"samples_ns\": [ 42537507467, 42526644963, 42539354103 ],\n    \"samples_ts\": [ 3.00911, 3.00988, 3.00898 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:51:14Z",
+          "avg_ns": 59069575810,
+          "stddev_ns": 3436129,
+          "avg_ts": 8.667745,
+          "stddev_ts": 0.000503,
+          "samples_ns": [
+            59065681090,
+            59070913844,
+            59072132497
+          ],
+          "samples_ts": [
+            8.66832,
+            8.66755,
+            8.66737
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:55:10Z",
+          "avg_ns": 42534502177,
+          "stddev_ns": 6873096,
+          "avg_ts": 3.009322,
+          "stddev_ts": 0.000486,
+          "samples_ns": [
+            42537507467,
+            42526644963,
+            42539354103
+          ],
+          "samples_ts": [
+            3.00911,
+            3.00988,
+            3.00898
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1078
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:09:47.882721+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T17:57:19Z\",\n    \"avg_ns\": 59078358238,\n    \"stddev_ns\": 3178633,\n    \"avg_ts\": 8.666456,\n    \"stddev_ts\": 0.000464,\n    \"samples_ns\": [ 59078535377, 59081425936, 59075113403 ],\n    \"samples_ts\": [ 8.66643, 8.66601, 8.66693 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:01:16Z\",\n    \"avg_ns\": 170465250789,\n    \"stddev_ns\": 41004240,\n    \"avg_ts\": 3.003545,\n    \"stddev_ts\": 0.000722,\n    \"samples_ns\": [ 170501565105, 170473398759, 170420788505 ],\n    \"samples_ts\": [ 3.0029, 3.0034, 3.00433 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T17:57:19Z",
+          "avg_ns": 59078358238,
+          "stddev_ns": 3178633,
+          "avg_ts": 8.666456,
+          "stddev_ts": 0.000464,
+          "samples_ns": [
+            59078535377,
+            59081425936,
+            59075113403
+          ],
+          "samples_ts": [
+            8.66643,
+            8.66601,
+            8.66693
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:01:16Z",
+          "avg_ns": 170465250789,
+          "stddev_ns": 41004240,
+          "avg_ts": 3.003545,
+          "stddev_ts": 0.000722,
+          "samples_ns": [
+            170501565105,
+            170473398759,
+            170420788505
+          ],
+          "samples_ts": [
+            3.0029,
+            3.0034,
+            3.00433
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1079
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:12:43.372713+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:09:49Z\",\n    \"avg_ns\": 9886821843,\n    \"stddev_ns\": 2810398,\n    \"avg_ts\": 12.946527,\n    \"stddev_ts\": 0.003676,\n    \"samples_ns\": [ 9883916134, 9887031234, 9889518163 ],\n    \"samples_ts\": [ 12.9503, 12.9463, 12.943 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:10:28Z\",\n    \"avg_ns\": 44776774807,\n    \"stddev_ns\": 7874325,\n    \"avg_ts\": 2.858625,\n    \"stddev_ts\": 0.000502,\n    \"samples_ns\": [ 44774298559, 44770442160, 44785583704 ],\n    \"samples_ts\": [ 2.85878, 2.85903, 2.85806 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:09:49Z",
+          "avg_ns": 9886821843,
+          "stddev_ns": 2810398,
+          "avg_ts": 12.946527,
+          "stddev_ts": 0.003676,
+          "samples_ns": [
+            9883916134,
+            9887031234,
+            9889518163
+          ],
+          "samples_ts": [
+            12.9503,
+            12.9463,
+            12.943
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:10:28Z",
+          "avg_ns": 44776774807,
+          "stddev_ns": 7874325,
+          "avg_ts": 2.858625,
+          "stddev_ts": 0.000502,
+          "samples_ns": [
+            44774298559,
+            44770442160,
+            44785583704
+          ],
+          "samples_ts": [
+            2.85878,
+            2.85903,
+            2.85806
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1080
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:22:26.252560+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:12:44Z\",\n    \"avg_ns\": 9918188143,\n    \"stddev_ns\": 5188327,\n    \"avg_ts\": 12.905585,\n    \"stddev_ts\": 0.006751,\n    \"samples_ns\": [ 9912200877, 9921297458, 9921066096 ],\n    \"samples_ts\": [ 12.9134, 12.9015, 12.9018 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:13:24Z\",\n    \"avg_ns\": 180532558823,\n    \"stddev_ns\": 149693998,\n    \"avg_ts\": 2.836055,\n    \"stddev_ts\": 0.002351,\n    \"samples_ns\": [ 180389955825, 180519263738, 180688456906 ],\n    \"samples_ts\": [ 2.8383, 2.83626, 2.83361 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:12:44Z",
+          "avg_ns": 9918188143,
+          "stddev_ns": 5188327,
+          "avg_ts": 12.905585,
+          "stddev_ts": 0.006751,
+          "samples_ns": [
+            9912200877,
+            9921297458,
+            9921066096
+          ],
+          "samples_ts": [
+            12.9134,
+            12.9015,
+            12.9018
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:13:24Z",
+          "avg_ns": 180532558823,
+          "stddev_ns": 149693998,
+          "avg_ts": 2.836055,
+          "stddev_ts": 0.002351,
+          "samples_ns": [
+            180389955825,
+            180519263738,
+            180688456906
+          ],
+          "samples_ts": [
+            2.8383,
+            2.83626,
+            2.83361
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1081
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:27:21.115244+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:22:27Z\",\n    \"avg_ns\": 39705243329,\n    \"stddev_ns\": 2974399,\n    \"avg_ts\": 12.895022,\n    \"stddev_ts\": 0.000962,\n    \"samples_ns\": [ 39701824242, 39706948175, 39706957572 ],\n    \"samples_ts\": [ 12.8961, 12.8945, 12.8945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:25:06Z\",\n    \"avg_ns\": 44813837328,\n    \"stddev_ns\": 25199822,\n    \"avg_ts\": 2.856261,\n    \"stddev_ts\": 0.001606,\n    \"samples_ns\": [ 44800902919, 44842877899, 44797731166 ],\n    \"samples_ts\": [ 2.85709, 2.85441, 2.85729 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:22:27Z",
+          "avg_ns": 39705243329,
+          "stddev_ns": 2974399,
+          "avg_ts": 12.895022,
+          "stddev_ts": 0.000962,
+          "samples_ns": [
+            39701824242,
+            39706948175,
+            39706957572
+          ],
+          "samples_ts": [
+            12.8961,
+            12.8945,
+            12.8945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:25:06Z",
+          "avg_ns": 44813837328,
+          "stddev_ns": 25199822,
+          "avg_ts": 2.856261,
+          "stddev_ts": 0.001606,
+          "samples_ns": [
+            44800902919,
+            44842877899,
+            44797731166
+          ],
+          "samples_ts": [
+            2.85709,
+            2.85441,
+            2.85729
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1082
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:39:03.559069+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:27:22Z\",\n    \"avg_ns\": 39805074016,\n    \"stddev_ns\": 19133789,\n    \"avg_ts\": 12.862684,\n    \"stddev_ts\": 0.006184,\n    \"samples_ns\": [ 39783068758, 39814388832, 39817764460 ],\n    \"samples_ts\": [ 12.8698, 12.8597, 12.8586 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:30:01Z\",\n    \"avg_ns\": 180529805679,\n    \"stddev_ns\": 82107790,\n    \"avg_ts\": 2.836097,\n    \"stddev_ts\": 0.001290,\n    \"samples_ns\": [ 180597903934, 180438631260, 180552881845 ],\n    \"samples_ts\": [ 2.83503, 2.83753, 2.83573 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:27:22Z",
+          "avg_ns": 39805074016,
+          "stddev_ns": 19133789,
+          "avg_ts": 12.862684,
+          "stddev_ts": 0.006184,
+          "samples_ns": [
+            39783068758,
+            39814388832,
+            39817764460
+          ],
+          "samples_ts": [
+            12.8698,
+            12.8597,
+            12.8586
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:30:01Z",
+          "avg_ns": 180529805679,
+          "stddev_ns": 82107790,
+          "avg_ts": 2.836097,
+          "stddev_ts": 0.00129,
+          "samples_ns": [
+            180597903934,
+            180438631260,
+            180552881845
+          ],
+          "samples_ts": [
+            2.83503,
+            2.83753,
+            2.83573
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1083
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:41:59.094064+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:39:04Z\",\n    \"avg_ns\": 9905394675,\n    \"stddev_ns\": 8325195,\n    \"avg_ts\": 12.922257,\n    \"stddev_ts\": 0.010865,\n    \"samples_ns\": [ 9909760025, 9910628655, 9895795346 ],\n    \"samples_ts\": [ 12.9166, 12.9154, 12.9348 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:39:44Z\",\n    \"avg_ns\": 44770809575,\n    \"stddev_ns\": 75632968,\n    \"avg_ts\": 2.859011,\n    \"stddev_ts\": 0.004825,\n    \"samples_ns\": [ 44727623725, 44726664096, 44858140905 ],\n    \"samples_ts\": [ 2.86177, 2.86183, 2.85344 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:39:04Z",
+          "avg_ns": 9905394675,
+          "stddev_ns": 8325195,
+          "avg_ts": 12.922257,
+          "stddev_ts": 0.010865,
+          "samples_ns": [
+            9909760025,
+            9910628655,
+            9895795346
+          ],
+          "samples_ts": [
+            12.9166,
+            12.9154,
+            12.9348
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:39:44Z",
+          "avg_ns": 44770809575,
+          "stddev_ns": 75632968,
+          "avg_ts": 2.859011,
+          "stddev_ts": 0.004825,
+          "samples_ns": [
+            44727623725,
+            44726664096,
+            44858140905
+          ],
+          "samples_ts": [
+            2.86177,
+            2.86183,
+            2.85344
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1084
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:51:43.727757+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:42:00Z\",\n    \"avg_ns\": 9902990626,\n    \"stddev_ns\": 5193890,\n    \"avg_ts\": 12.925391,\n    \"stddev_ts\": 0.006776,\n    \"samples_ns\": [ 9900724758, 9908931468, 9899315653 ],\n    \"samples_ts\": [ 12.9283, 12.9176, 12.9302 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:42:39Z\",\n    \"avg_ns\": 181144448666,\n    \"stddev_ns\": 65257951,\n    \"avg_ts\": 2.826474,\n    \"stddev_ts\": 0.001018,\n    \"samples_ns\": [ 181122835836, 181092741867, 181217768297 ],\n    \"samples_ts\": [ 2.82681, 2.82728, 2.82533 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:42:00Z",
+          "avg_ns": 9902990626,
+          "stddev_ns": 5193890,
+          "avg_ts": 12.925391,
+          "stddev_ts": 0.006776,
+          "samples_ns": [
+            9900724758,
+            9908931468,
+            9899315653
+          ],
+          "samples_ts": [
+            12.9283,
+            12.9176,
+            12.9302
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:42:39Z",
+          "avg_ns": 181144448666,
+          "stddev_ns": 65257951,
+          "avg_ts": 2.826474,
+          "stddev_ts": 0.001018,
+          "samples_ns": [
+            181122835836,
+            181092741867,
+            181217768297
+          ],
+          "samples_ts": [
+            2.82681,
+            2.82728,
+            2.82533
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1085
+    },
+    {
+      "timestamp_utc": "2025-12-10T18:56:44.779415+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:51:44Z\",\n    \"avg_ns\": 39919427080,\n    \"stddev_ns\": 132995758,\n    \"avg_ts\": 12.825930,\n    \"stddev_ts\": 0.042810,\n    \"samples_ns\": [ 39766477796, 40007849909, 39983953535 ],\n    \"samples_ts\": [ 12.8752, 12.7975, 12.8051 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:54:24Z\",\n    \"avg_ns\": 46592347001,\n    \"stddev_ns\": 19262325,\n    \"avg_ts\": 2.747233,\n    \"stddev_ts\": 0.001136,\n    \"samples_ns\": [ 46573243298, 46592035943, 46611761763 ],\n    \"samples_ts\": [ 2.74836, 2.74725, 2.74609 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:51:44Z",
+          "avg_ns": 39919427080,
+          "stddev_ns": 132995758,
+          "avg_ts": 12.82593,
+          "stddev_ts": 0.04281,
+          "samples_ns": [
+            39766477796,
+            40007849909,
+            39983953535
+          ],
+          "samples_ts": [
+            12.8752,
+            12.7975,
+            12.8051
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:54:24Z",
+          "avg_ns": 46592347001,
+          "stddev_ns": 19262325,
+          "avg_ts": 2.747233,
+          "stddev_ts": 0.001136,
+          "samples_ns": [
+            46573243298,
+            46592035943,
+            46611761763
+          ],
+          "samples_ts": [
+            2.74836,
+            2.74725,
+            2.74609
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1086
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:08:50.264544+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:56:45Z\",\n    \"avg_ns\": 40052685033,\n    \"stddev_ns\": 41309116,\n    \"avg_ts\": 12.783172,\n    \"stddev_ts\": 0.013177,\n    \"samples_ns\": [ 40036517451, 40021905990, 40099631660 ],\n    \"samples_ts\": [ 12.7883, 12.793, 12.7682 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T18:59:25Z\",\n    \"avg_ns\": 187982414846,\n    \"stddev_ns\": 159267568,\n    \"avg_ts\": 2.723660,\n    \"stddev_ts\": 0.002308,\n    \"samples_ns\": [ 188015527409, 188122523261, 187809193868 ],\n    \"samples_ts\": [ 2.72318, 2.72163, 2.72617 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:56:45Z",
+          "avg_ns": 40052685033,
+          "stddev_ns": 41309116,
+          "avg_ts": 12.783172,
+          "stddev_ts": 0.013177,
+          "samples_ns": [
+            40036517451,
+            40021905990,
+            40099631660
+          ],
+          "samples_ts": [
+            12.7883,
+            12.793,
+            12.7682
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T18:59:25Z",
+          "avg_ns": 187982414846,
+          "stddev_ns": 159267568,
+          "avg_ts": 2.72366,
+          "stddev_ts": 0.002308,
+          "samples_ns": [
+            188015527409,
+            188122523261,
+            187809193868
+          ],
+          "samples_ts": [
+            2.72318,
+            2.72163,
+            2.72617
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1087
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:11:49.335628+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:08:51Z\",\n    \"avg_ns\": 9881794958,\n    \"stddev_ns\": 3358176,\n    \"avg_ts\": 12.953113,\n    \"stddev_ts\": 0.004401,\n    \"samples_ns\": [ 9884641282, 9878093354, 9882650239 ],\n    \"samples_ts\": [ 12.9494, 12.958, 12.952 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:09:30Z\",\n    \"avg_ns\": 45979465034,\n    \"stddev_ns\": 328441418,\n    \"avg_ts\": 2.783946,\n    \"stddev_ts\": 0.019814,\n    \"samples_ns\": [ 45843447207, 46354064940, 45740882957 ],\n    \"samples_ts\": [ 2.79211, 2.76135, 2.79837 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:08:51Z",
+          "avg_ns": 9881794958,
+          "stddev_ns": 3358176,
+          "avg_ts": 12.953113,
+          "stddev_ts": 0.004401,
+          "samples_ns": [
+            9884641282,
+            9878093354,
+            9882650239
+          ],
+          "samples_ts": [
+            12.9494,
+            12.958,
+            12.952
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:09:30Z",
+          "avg_ns": 45979465034,
+          "stddev_ns": 328441418,
+          "avg_ts": 2.783946,
+          "stddev_ts": 0.019814,
+          "samples_ns": [
+            45843447207,
+            46354064940,
+            45740882957
+          ],
+          "samples_ts": [
+            2.79211,
+            2.76135,
+            2.79837
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1088
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:21:45.562281+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:11:50Z\",\n    \"avg_ns\": 9873665212,\n    \"stddev_ns\": 7815530,\n    \"avg_ts\": 12.963783,\n    \"stddev_ts\": 0.010262,\n    \"samples_ns\": [ 9881393885, 9873836123, 9865765628 ],\n    \"samples_ts\": [ 12.9536, 12.9636, 12.9742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:12:29Z\",\n    \"avg_ns\": 185071929385,\n    \"stddev_ns\": 1577744239,\n    \"avg_ts\": 2.766627,\n    \"stddev_ts\": 0.023695,\n    \"samples_ns\": [ 186164397643, 183263094160, 185788296353 ],\n    \"samples_ts\": [ 2.75026, 2.7938, 2.75582 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:11:50Z",
+          "avg_ns": 9873665212,
+          "stddev_ns": 7815530,
+          "avg_ts": 12.963783,
+          "stddev_ts": 0.010262,
+          "samples_ns": [
+            9881393885,
+            9873836123,
+            9865765628
+          ],
+          "samples_ts": [
+            12.9536,
+            12.9636,
+            12.9742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:12:29Z",
+          "avg_ns": 185071929385,
+          "stddev_ns": 1577744239,
+          "avg_ts": 2.766627,
+          "stddev_ts": 0.023695,
+          "samples_ns": [
+            186164397643,
+            183263094160,
+            185788296353
+          ],
+          "samples_ts": [
+            2.75026,
+            2.7938,
+            2.75582
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1089
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:26:50.177622+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:21:46Z\",\n    \"avg_ns\": 40803498537,\n    \"stddev_ns\": 30580561,\n    \"avg_ts\": 12.547948,\n    \"stddev_ts\": 0.009404,\n    \"samples_ns\": [ 40773943444, 40835010572, 40801541595 ],\n    \"samples_ts\": [ 12.557, 12.5383, 12.5485 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:24:29Z\",\n    \"avg_ns\": 46606488806,\n    \"stddev_ns\": 10099699,\n    \"avg_ts\": 2.746399,\n    \"stddev_ts\": 0.000595,\n    \"samples_ns\": [ 46618150689, 46600592358, 46600723371 ],\n    \"samples_ts\": [ 2.74571, 2.74675, 2.74674 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:21:46Z",
+          "avg_ns": 40803498537,
+          "stddev_ns": 30580561,
+          "avg_ts": 12.547948,
+          "stddev_ts": 0.009404,
+          "samples_ns": [
+            40773943444,
+            40835010572,
+            40801541595
+          ],
+          "samples_ts": [
+            12.557,
+            12.5383,
+            12.5485
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:24:29Z",
+          "avg_ns": 46606488806,
+          "stddev_ns": 10099699,
+          "avg_ts": 2.746399,
+          "stddev_ts": 0.000595,
+          "samples_ns": [
+            46618150689,
+            46600592358,
+            46600723371
+          ],
+          "samples_ts": [
+            2.74571,
+            2.74675,
+            2.74674
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1090
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:38:47.027002+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:26:51Z\",\n    \"avg_ns\": 40881721120,\n    \"stddev_ns\": 15842058,\n    \"avg_ts\": 12.523936,\n    \"stddev_ts\": 0.004852,\n    \"samples_ns\": [ 40877950633, 40899105610, 40868107119 ],\n    \"samples_ts\": [ 12.5251, 12.5186, 12.5281 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:29:34Z\",\n    \"avg_ns\": 183903835845,\n    \"stddev_ns\": 2043730870,\n    \"avg_ts\": 2.784800,\n    \"stddev_ts\": 0.055460,\n    \"samples_ns\": [ 187545596871, 183941285330, 180224625334 ],\n    \"samples_ts\": [ 2.73, 2.7835, 2.8409 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:26:51Z",
+          "avg_ns": 40881721120,
+          "stddev_ns": 15842058,
+          "avg_ts": 12.523936,
+          "stddev_ts": 0.004852,
+          "samples_ns": [
+            40877950633,
+            40899105610,
+            40868107119
+          ],
+          "samples_ts": [
+            12.5251,
+            12.5186,
+            12.5281
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:29:34Z",
+          "avg_ns": 183903835845,
+          "stddev_ns": 2043730870,
+          "avg_ts": 2.7848,
+          "stddev_ts": 0.05546,
+          "samples_ns": [
+            187545596871,
+            183941285330,
+            180224625334
+          ],
+          "samples_ts": [
+            2.73,
+            2.7835,
+            2.8409
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1091
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:41:42.512216+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:38:48Z\",\n    \"avg_ns\": 9890321427,\n    \"stddev_ns\": 1201141,\n    \"avg_ts\": 12.941946,\n    \"stddev_ts\": 0.001566,\n    \"samples_ns\": [ 9888988941, 9890669535, 9891305806 ],\n    \"samples_ts\": [ 12.9437, 12.9415, 12.9407 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:39:27Z\",\n    \"avg_ns\": 44775874886,\n    \"stddev_ns\": 15458871,\n    \"avg_ts\": 2.858683,\n    \"stddev_ts\": 0.000987,\n    \"samples_ns\": [ 44766422113, 44767489609, 44793712937 ],\n    \"samples_ts\": [ 2.85929, 2.85922, 2.85754 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:38:48Z",
+          "avg_ns": 9890321427,
+          "stddev_ns": 1201141,
+          "avg_ts": 12.941946,
+          "stddev_ts": 0.001566,
+          "samples_ns": [
+            9888988941,
+            9890669535,
+            9891305806
+          ],
+          "samples_ts": [
+            12.9437,
+            12.9415,
+            12.9407
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:39:27Z",
+          "avg_ns": 44775874886,
+          "stddev_ns": 15458871,
+          "avg_ts": 2.858683,
+          "stddev_ts": 0.000987,
+          "samples_ns": [
+            44766422113,
+            44767489609,
+            44793712937
+          ],
+          "samples_ts": [
+            2.85929,
+            2.85922,
+            2.85754
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1092
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:51:25.601655+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:41:43Z\",\n    \"avg_ns\": 9887525609,\n    \"stddev_ns\": 3875284,\n    \"avg_ts\": 12.945606,\n    \"stddev_ts\": 0.005074,\n    \"samples_ns\": [ 9887203348, 9891551961, 9883821518 ],\n    \"samples_ts\": [ 12.946, 12.9403, 12.9505 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:42:23Z\",\n    \"avg_ns\": 180628328521,\n    \"stddev_ns\": 105898443,\n    \"avg_ts\": 2.834550,\n    \"stddev_ts\": 0.001662,\n    \"samples_ns\": [ 180626794559, 180523205391, 180734985613 ],\n    \"samples_ts\": [ 2.83457, 2.8362, 2.83288 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:41:43Z",
+          "avg_ns": 9887525609,
+          "stddev_ns": 3875284,
+          "avg_ts": 12.945606,
+          "stddev_ts": 0.005074,
+          "samples_ns": [
+            9887203348,
+            9891551961,
+            9883821518
+          ],
+          "samples_ts": [
+            12.946,
+            12.9403,
+            12.9505
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:42:23Z",
+          "avg_ns": 180628328521,
+          "stddev_ns": 105898443,
+          "avg_ts": 2.83455,
+          "stddev_ts": 0.001662,
+          "samples_ns": [
+            180626794559,
+            180523205391,
+            180734985613
+          ],
+          "samples_ts": [
+            2.83457,
+            2.8362,
+            2.83288
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1093
+    },
+    {
+      "timestamp_utc": "2025-12-10T19:56:20.314463+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:51:26Z\",\n    \"avg_ns\": 39706549617,\n    \"stddev_ns\": 6809119,\n    \"avg_ts\": 12.894598,\n    \"stddev_ts\": 0.002210,\n    \"samples_ns\": [ 39712803298, 39707545035, 39699300519 ],\n    \"samples_ts\": [ 12.8926, 12.8943, 12.897 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:54:05Z\",\n    \"avg_ns\": 44793349652,\n    \"stddev_ns\": 20829348,\n    \"avg_ts\": 2.857567,\n    \"stddev_ts\": 0.001329,\n    \"samples_ns\": [ 44802861494, 44807724999, 44769462463 ],\n    \"samples_ts\": [ 2.85696, 2.85665, 2.85909 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:51:26Z",
+          "avg_ns": 39706549617,
+          "stddev_ns": 6809119,
+          "avg_ts": 12.894598,
+          "stddev_ts": 0.00221,
+          "samples_ns": [
+            39712803298,
+            39707545035,
+            39699300519
+          ],
+          "samples_ts": [
+            12.8926,
+            12.8943,
+            12.897
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:54:05Z",
+          "avg_ns": 44793349652,
+          "stddev_ns": 20829348,
+          "avg_ts": 2.857567,
+          "stddev_ts": 0.001329,
+          "samples_ns": [
+            44802861494,
+            44807724999,
+            44769462463
+          ],
+          "samples_ts": [
+            2.85696,
+            2.85665,
+            2.85909
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1094
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:08:02.019935+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:56:21Z\",\n    \"avg_ns\": 39696232478,\n    \"stddev_ns\": 22111459,\n    \"avg_ts\": 12.897952,\n    \"stddev_ts\": 0.007183,\n    \"samples_ns\": [ 39719616326, 39693413774, 39675667336 ],\n    \"samples_ts\": [ 12.8904, 12.8989, 12.9046 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T19:59:00Z\",\n    \"avg_ns\": 180434163925,\n    \"stddev_ns\": 42231225,\n    \"avg_ts\": 2.837600,\n    \"stddev_ts\": 0.000664,\n    \"samples_ns\": [ 180412556527, 180482824410, 180407110839 ],\n    \"samples_ts\": [ 2.83794, 2.83684, 2.83803 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:56:21Z",
+          "avg_ns": 39696232478,
+          "stddev_ns": 22111459,
+          "avg_ts": 12.897952,
+          "stddev_ts": 0.007183,
+          "samples_ns": [
+            39719616326,
+            39693413774,
+            39675667336
+          ],
+          "samples_ts": [
+            12.8904,
+            12.8989,
+            12.9046
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T19:59:00Z",
+          "avg_ns": 180434163925,
+          "stddev_ns": 42231225,
+          "avg_ts": 2.8376,
+          "stddev_ts": 0.000664,
+          "samples_ns": [
+            180412556527,
+            180482824410,
+            180407110839
+          ],
+          "samples_ts": [
+            2.83794,
+            2.83684,
+            2.83803
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1095
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:10:57.610002+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:08:03Z\",\n    \"avg_ns\": 9858579162,\n    \"stddev_ns\": 51347485,\n    \"avg_ts\": 12.983851,\n    \"stddev_ts\": 0.067828,\n    \"samples_ns\": [ 9799315002, 9886673720, 9889748766 ],\n    \"samples_ts\": [ 13.0621, 12.9467, 12.9427 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:08:42Z\",\n    \"avg_ns\": 44851255090,\n    \"stddev_ns\": 13115239,\n    \"avg_ts\": 2.853878,\n    \"stddev_ts\": 0.000834,\n    \"samples_ns\": [ 44854283336, 44836894097, 44862587839 ],\n    \"samples_ts\": [ 2.85369, 2.85479, 2.85316 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:08:03Z",
+          "avg_ns": 9858579162,
+          "stddev_ns": 51347485,
+          "avg_ts": 12.983851,
+          "stddev_ts": 0.067828,
+          "samples_ns": [
+            9799315002,
+            9886673720,
+            9889748766
+          ],
+          "samples_ts": [
+            13.0621,
+            12.9467,
+            12.9427
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:08:42Z",
+          "avg_ns": 44851255090,
+          "stddev_ns": 13115239,
+          "avg_ts": 2.853878,
+          "stddev_ts": 0.000834,
+          "samples_ns": [
+            44854283336,
+            44836894097,
+            44862587839
+          ],
+          "samples_ts": [
+            2.85369,
+            2.85479,
+            2.85316
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1096
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:20:42.156222+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:10:58Z\",\n    \"avg_ns\": 9917002915,\n    \"stddev_ns\": 7883318,\n    \"avg_ts\": 12.907131,\n    \"stddev_ts\": 0.010256,\n    \"samples_ns\": [ 9910409850, 9925734097, 9914864799 ],\n    \"samples_ts\": [ 12.9157, 12.8958, 12.9099 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:11:38Z\",\n    \"avg_ns\": 181092217449,\n    \"stddev_ns\": 175286000,\n    \"avg_ts\": 2.827291,\n    \"stddev_ts\": 0.002737,\n    \"samples_ns\": [ 180916922456, 181092235435, 181267494456 ],\n    \"samples_ts\": [ 2.83003, 2.82729, 2.82455 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:10:58Z",
+          "avg_ns": 9917002915,
+          "stddev_ns": 7883318,
+          "avg_ts": 12.907131,
+          "stddev_ts": 0.010256,
+          "samples_ns": [
+            9910409850,
+            9925734097,
+            9914864799
+          ],
+          "samples_ts": [
+            12.9157,
+            12.8958,
+            12.9099
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:11:38Z",
+          "avg_ns": 181092217449,
+          "stddev_ns": 175286000,
+          "avg_ts": 2.827291,
+          "stddev_ts": 0.002737,
+          "samples_ns": [
+            180916922456,
+            181092235435,
+            181267494456
+          ],
+          "samples_ts": [
+            2.83003,
+            2.82729,
+            2.82455
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1097
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:25:40.303181+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:20:43Z\",\n    \"avg_ns\": 39985149562,\n    \"stddev_ns\": 75726912,\n    \"avg_ts\": 12.804785,\n    \"stddev_ts\": 0.024277,\n    \"samples_ns\": [ 39897938886, 40023251383, 40034258417 ],\n    \"samples_ts\": [ 12.8327, 12.7926, 12.789 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:23:22Z\",\n    \"avg_ns\": 45642497697,\n    \"stddev_ns\": 778460506,\n    \"avg_ts\": 2.804948,\n    \"stddev_ts\": 0.047841,\n    \"samples_ns\": [ 46422778541, 45638844103, 44865870448 ],\n    \"samples_ts\": [ 2.75727, 2.80463, 2.85295 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:20:43Z",
+          "avg_ns": 39985149562,
+          "stddev_ns": 75726912,
+          "avg_ts": 12.804785,
+          "stddev_ts": 0.024277,
+          "samples_ns": [
+            39897938886,
+            40023251383,
+            40034258417
+          ],
+          "samples_ts": [
+            12.8327,
+            12.7926,
+            12.789
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:23:22Z",
+          "avg_ns": 45642497697,
+          "stddev_ns": 778460506,
+          "avg_ts": 2.804948,
+          "stddev_ts": 0.047841,
+          "samples_ns": [
+            46422778541,
+            45638844103,
+            44865870448
+          ],
+          "samples_ts": [
+            2.75727,
+            2.80463,
+            2.85295
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1098
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:37:26.030362+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:25:41Z\",\n    \"avg_ns\": 40036897594,\n    \"stddev_ns\": 72110855,\n    \"avg_ts\": 12.788231,\n    \"stddev_ts\": 0.023057,\n    \"samples_ns\": [ 39953633109, 40078124294, 40078935381 ],\n    \"samples_ts\": [ 12.8149, 12.775, 12.7748 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:28:21Z\",\n    \"avg_ns\": 181325849289,\n    \"stddev_ns\": 1426599355,\n    \"avg_ts\": 2.823762,\n    \"stddev_ts\": 0.022120,\n    \"samples_ns\": [ 182965461877, 180643659426, 180368426566 ],\n    \"samples_ts\": [ 2.79834, 2.83431, 2.83863 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:25:41Z",
+          "avg_ns": 40036897594,
+          "stddev_ns": 72110855,
+          "avg_ts": 12.788231,
+          "stddev_ts": 0.023057,
+          "samples_ns": [
+            39953633109,
+            40078124294,
+            40078935381
+          ],
+          "samples_ts": [
+            12.8149,
+            12.775,
+            12.7748
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:28:21Z",
+          "avg_ns": 181325849289,
+          "stddev_ns": 1426599355,
+          "avg_ts": 2.823762,
+          "stddev_ts": 0.02212,
+          "samples_ns": [
+            182965461877,
+            180643659426,
+            180368426566
+          ],
+          "samples_ts": [
+            2.79834,
+            2.83431,
+            2.83863
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1099
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:40:22.626547+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:37:27Z\",\n    \"avg_ns\": 9891760419,\n    \"stddev_ns\": 1085543,\n    \"avg_ts\": 12.940063,\n    \"stddev_ts\": 0.001420,\n    \"samples_ns\": [ 9892786247, 9891871334, 9890623676 ],\n    \"samples_ts\": [ 12.9387, 12.9399, 12.9415 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:38:06Z\",\n    \"avg_ns\": 45149919664,\n    \"stddev_ns\": 377057526,\n    \"avg_ts\": 2.835131,\n    \"stddev_ts\": 0.023565,\n    \"samples_ns\": [ 44913235623, 45584738906, 44951784464 ],\n    \"samples_ts\": [ 2.84994, 2.80796, 2.8475 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:37:27Z",
+          "avg_ns": 9891760419,
+          "stddev_ns": 1085543,
+          "avg_ts": 12.940063,
+          "stddev_ts": 0.00142,
+          "samples_ns": [
+            9892786247,
+            9891871334,
+            9890623676
+          ],
+          "samples_ts": [
+            12.9387,
+            12.9399,
+            12.9415
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:38:06Z",
+          "avg_ns": 45149919664,
+          "stddev_ns": 377057526,
+          "avg_ts": 2.835131,
+          "stddev_ts": 0.023565,
+          "samples_ns": [
+            44913235623,
+            45584738906,
+            44951784464
+          ],
+          "samples_ts": [
+            2.84994,
+            2.80796,
+            2.8475
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1100
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:50:07.838760+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:40:23Z\",\n    \"avg_ns\": 9897756768,\n    \"stddev_ns\": 3452844,\n    \"avg_ts\": 12.932224,\n    \"stddev_ts\": 0.004512,\n    \"samples_ns\": [ 9900192616, 9893805326, 9899272362 ],\n    \"samples_ts\": [ 12.929, 12.9374, 12.9302 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:41:03Z\",\n    \"avg_ns\": 181341514796,\n    \"stddev_ns\": 1283895578,\n    \"avg_ts\": 2.823496,\n    \"stddev_ts\": 0.019917,\n    \"samples_ns\": [ 182807807114, 180797783484, 180418953790 ],\n    \"samples_ts\": [ 2.80076, 2.83189, 2.83784 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:40:23Z",
+          "avg_ns": 9897756768,
+          "stddev_ns": 3452844,
+          "avg_ts": 12.932224,
+          "stddev_ts": 0.004512,
+          "samples_ns": [
+            9900192616,
+            9893805326,
+            9899272362
+          ],
+          "samples_ts": [
+            12.929,
+            12.9374,
+            12.9302
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:41:03Z",
+          "avg_ns": 181341514796,
+          "stddev_ns": 1283895578,
+          "avg_ts": 2.823496,
+          "stddev_ts": 0.019917,
+          "samples_ns": [
+            182807807114,
+            180797783484,
+            180418953790
+          ],
+          "samples_ts": [
+            2.80076,
+            2.83189,
+            2.83784
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1101
+    },
+    {
+      "timestamp_utc": "2025-12-10T20:55:12.265282+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:50:08Z\",\n    \"avg_ns\": 40736639033,\n    \"stddev_ns\": 80006691,\n    \"avg_ts\": 12.568570,\n    \"stddev_ts\": 0.024706,\n    \"samples_ns\": [ 40646812374, 40800244833, 40762859893 ],\n    \"samples_ts\": [ 12.5963, 12.5489, 12.5605 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:52:51Z\",\n    \"avg_ns\": 46609789527,\n    \"stddev_ns\": 9124333,\n    \"avg_ts\": 2.746204,\n    \"stddev_ts\": 0.000538,\n    \"samples_ns\": [ 46610402615, 46618591855, 46600374111 ],\n    \"samples_ts\": [ 2.74617, 2.74569, 2.74676 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:50:08Z",
+          "avg_ns": 40736639033,
+          "stddev_ns": 80006691,
+          "avg_ts": 12.56857,
+          "stddev_ts": 0.024706,
+          "samples_ns": [
+            40646812374,
+            40800244833,
+            40762859893
+          ],
+          "samples_ts": [
+            12.5963,
+            12.5489,
+            12.5605
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:52:51Z",
+          "avg_ns": 46609789527,
+          "stddev_ns": 9124333,
+          "avg_ts": 2.746204,
+          "stddev_ts": 0.000538,
+          "samples_ns": [
+            46610402615,
+            46618591855,
+            46600374111
+          ],
+          "samples_ts": [
+            2.74617,
+            2.74569,
+            2.74676
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1102
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:07:07.095696+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:55:13Z\",\n    \"avg_ns\": 40797012532,\n    \"stddev_ns\": 78787537,\n    \"avg_ts\": 12.549970,\n    \"stddev_ts\": 0.024262,\n    \"samples_ns\": [ 40833688016, 40850776508, 40706573072 ],\n    \"samples_ts\": [ 12.5387, 12.5334, 12.5778 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T20:57:56Z\",\n    \"avg_ns\": 183322326664,\n    \"stddev_ns\": 3766502433,\n    \"avg_ts\": 2.793673,\n    \"stddev_ts\": 0.056827,\n    \"samples_ns\": [ 187598237617, 181872833043, 180495909332 ],\n    \"samples_ts\": [ 2.72924, 2.81515, 2.83663 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:55:13Z",
+          "avg_ns": 40797012532,
+          "stddev_ns": 78787537,
+          "avg_ts": 12.54997,
+          "stddev_ts": 0.024262,
+          "samples_ns": [
+            40833688016,
+            40850776508,
+            40706573072
+          ],
+          "samples_ts": [
+            12.5387,
+            12.5334,
+            12.5778
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T20:57:56Z",
+          "avg_ns": 183322326664,
+          "stddev_ns": 3766502433,
+          "avg_ts": 2.793673,
+          "stddev_ts": 0.056827,
+          "samples_ns": [
+            187598237617,
+            181872833043,
+            180495909332
+          ],
+          "samples_ts": [
+            2.72924,
+            2.81515,
+            2.83663
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1103
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:10:02.913713+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:07:08Z\",\n    \"avg_ns\": 9890912587,\n    \"stddev_ns\": 6879372,\n    \"avg_ts\": 12.941176,\n    \"stddev_ts\": 0.009001,\n    \"samples_ns\": [ 9897429602, 9891586289, 9883721871 ],\n    \"samples_ts\": [ 12.9327, 12.9403, 12.9506 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:07:47Z\",\n    \"avg_ns\": 44867454119,\n    \"stddev_ns\": 15585183,\n    \"avg_ts\": 2.852848,\n    \"stddev_ts\": 0.000991,\n    \"samples_ns\": [ 44882564137, 44851433683, 44868364537 ],\n    \"samples_ts\": [ 2.85189, 2.85387, 2.85279 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:07:08Z",
+          "avg_ns": 9890912587,
+          "stddev_ns": 6879372,
+          "avg_ts": 12.941176,
+          "stddev_ts": 0.009001,
+          "samples_ns": [
+            9897429602,
+            9891586289,
+            9883721871
+          ],
+          "samples_ts": [
+            12.9327,
+            12.9403,
+            12.9506
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:07:47Z",
+          "avg_ns": 44867454119,
+          "stddev_ns": 15585183,
+          "avg_ts": 2.852848,
+          "stddev_ts": 0.000991,
+          "samples_ns": [
+            44882564137,
+            44851433683,
+            44868364537
+          ],
+          "samples_ts": [
+            2.85189,
+            2.85387,
+            2.85279
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1104
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:19:53.900853+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:10:04Z\",\n    \"avg_ns\": 9892808474,\n    \"stddev_ns\": 2594723,\n    \"avg_ts\": 12.938692,\n    \"stddev_ts\": 0.003389,\n    \"samples_ns\": [ 9894161859, 9889821140, 9894442425 ],\n    \"samples_ts\": [ 12.9369, 12.9426, 12.9366 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:10:43Z\",\n    \"avg_ns\": 183277282418,\n    \"stddev_ns\": 3385009939,\n    \"avg_ts\": 2.793705,\n    \"stddev_ts\": 0.022724,\n    \"samples_ns\": [ 182908584963, 184922099060, 182001163233 ],\n    \"samples_ts\": [ 2.79921, 2.76873, 2.81317 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:10:04Z",
+          "avg_ns": 9892808474,
+          "stddev_ns": 2594723,
+          "avg_ts": 12.938692,
+          "stddev_ts": 0.003389,
+          "samples_ns": [
+            9894161859,
+            9889821140,
+            9894442425
+          ],
+          "samples_ts": [
+            12.9369,
+            12.9426,
+            12.9366
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:10:43Z",
+          "avg_ns": 183277282418,
+          "stddev_ns": 3385009939,
+          "avg_ts": 2.793705,
+          "stddev_ts": 0.022724,
+          "samples_ns": [
+            182908584963,
+            184922099060,
+            182001163233
+          ],
+          "samples_ts": [
+            2.79921,
+            2.76873,
+            2.81317
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1105
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:24:50.953644+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:19:55Z\",\n    \"avg_ns\": 39797233522,\n    \"stddev_ns\": 14248592,\n    \"avg_ts\": 12.865217,\n    \"stddev_ts\": 0.004607,\n    \"samples_ns\": [ 39806203669, 39780803795, 39804693102 ],\n    \"samples_ts\": [ 12.8623, 12.8705, 12.8628 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:22:34Z\",\n    \"avg_ns\": 45423322834,\n    \"stddev_ns\": 717815685,\n    \"avg_ts\": 2.818402,\n    \"stddev_ts\": 0.044267,\n    \"samples_ns\": [ 46221294156, 45218478062, 44830196284 ],\n    \"samples_ts\": [ 2.76929, 2.8307, 2.85522 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:19:55Z",
+          "avg_ns": 39797233522,
+          "stddev_ns": 14248592,
+          "avg_ts": 12.865217,
+          "stddev_ts": 0.004607,
+          "samples_ns": [
+            39806203669,
+            39780803795,
+            39804693102
+          ],
+          "samples_ts": [
+            12.8623,
+            12.8705,
+            12.8628
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:22:34Z",
+          "avg_ns": 45423322834,
+          "stddev_ns": 717815685,
+          "avg_ts": 2.818402,
+          "stddev_ts": 0.044267,
+          "samples_ns": [
+            46221294156,
+            45218478062,
+            44830196284
+          ],
+          "samples_ts": [
+            2.76929,
+            2.8307,
+            2.85522
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1106
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:36:46.206498+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:24:52Z\",\n    \"avg_ns\": 39722806590,\n    \"stddev_ns\": 22549939,\n    \"avg_ts\": 12.889324,\n    \"stddev_ts\": 0.007316,\n    \"samples_ns\": [ 39746071042, 39721298313, 39701050417 ],\n    \"samples_ts\": [ 12.8818, 12.8898, 12.8964 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:27:31Z\",\n    \"avg_ns\": 184909228823,\n    \"stddev_ns\": 3747722146,\n    \"avg_ts\": 2.769694,\n    \"stddev_ts\": 0.056794,\n    \"samples_ns\": [ 187250032425, 186890953257, 180586700789 ],\n    \"samples_ts\": [ 2.73431, 2.73957, 2.8352 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:24:52Z",
+          "avg_ns": 39722806590,
+          "stddev_ns": 22549939,
+          "avg_ts": 12.889324,
+          "stddev_ts": 0.007316,
+          "samples_ns": [
+            39746071042,
+            39721298313,
+            39701050417
+          ],
+          "samples_ts": [
+            12.8818,
+            12.8898,
+            12.8964
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:27:31Z",
+          "avg_ns": 184909228823,
+          "stddev_ns": 3747722146,
+          "avg_ts": 2.769694,
+          "stddev_ts": 0.056794,
+          "samples_ns": [
+            187250032425,
+            186890953257,
+            180586700789
+          ],
+          "samples_ts": [
+            2.73431,
+            2.73957,
+            2.8352
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1107
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:39:42.175446+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:36:47Z\",\n    \"avg_ns\": 9913957223,\n    \"stddev_ns\": 3986954,\n    \"avg_ts\": 12.911092,\n    \"stddev_ts\": 0.005191,\n    \"samples_ns\": [ 9918387751, 9910658619, 9912825299 ],\n    \"samples_ts\": [ 12.9053, 12.9154, 12.9126 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:37:26Z\",\n    \"avg_ns\": 44904649761,\n    \"stddev_ns\": 44295715,\n    \"avg_ts\": 2.850486,\n    \"stddev_ts\": 0.002813,\n    \"samples_ns\": [ 44935155420, 44853841848, 44924952015 ],\n    \"samples_ts\": [ 2.84855, 2.85371, 2.8492 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:36:47Z",
+          "avg_ns": 9913957223,
+          "stddev_ns": 3986954,
+          "avg_ts": 12.911092,
+          "stddev_ts": 0.005191,
+          "samples_ns": [
+            9918387751,
+            9910658619,
+            9912825299
+          ],
+          "samples_ts": [
+            12.9053,
+            12.9154,
+            12.9126
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:37:26Z",
+          "avg_ns": 44904649761,
+          "stddev_ns": 44295715,
+          "avg_ts": 2.850486,
+          "stddev_ts": 0.002813,
+          "samples_ns": [
+            44935155420,
+            44853841848,
+            44924952015
+          ],
+          "samples_ts": [
+            2.84855,
+            2.85371,
+            2.8492
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1108
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:49:24.721268+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:39:43Z\",\n    \"avg_ns\": 9769493461,\n    \"stddev_ns\": 13892219,\n    \"avg_ts\": 13.102028,\n    \"stddev_ts\": 0.018615,\n    \"samples_ns\": [ 9761673678, 9761273957, 9785532749 ],\n    \"samples_ts\": [ 13.1125, 13.113, 13.0805 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:40:22Z\",\n    \"avg_ns\": 180622836513,\n    \"stddev_ns\": 120552861,\n    \"avg_ts\": 2.834637,\n    \"stddev_ts\": 0.001891,\n    \"samples_ns\": [ 180758425456, 180582325010, 180527759075 ],\n    \"samples_ts\": [ 2.83251, 2.83527, 2.83613 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:39:43Z",
+          "avg_ns": 9769493461,
+          "stddev_ns": 13892219,
+          "avg_ts": 13.102028,
+          "stddev_ts": 0.018615,
+          "samples_ns": [
+            9761673678,
+            9761273957,
+            9785532749
+          ],
+          "samples_ts": [
+            13.1125,
+            13.113,
+            13.0805
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:40:22Z",
+          "avg_ns": 180622836513,
+          "stddev_ns": 120552861,
+          "avg_ts": 2.834637,
+          "stddev_ts": 0.001891,
+          "samples_ns": [
+            180758425456,
+            180582325010,
+            180527759075
+          ],
+          "samples_ts": [
+            2.83251,
+            2.83527,
+            2.83613
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1109
+    },
+    {
+      "timestamp_utc": "2025-12-10T21:54:26.257745+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:49:25Z\",\n    \"avg_ns\": 39977597271,\n    \"stddev_ns\": 9051351,\n    \"avg_ts\": 12.807173,\n    \"stddev_ts\": 0.002899,\n    \"samples_ns\": [ 39988039463, 39972703990, 39972048361 ],\n    \"samples_ts\": [ 12.8038, 12.8087, 12.809 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:52:06Z\",\n    \"avg_ns\": 46546950939,\n    \"stddev_ns\": 23167244,\n    \"avg_ts\": 2.749912,\n    \"stddev_ts\": 0.001369,\n    \"samples_ns\": [ 46523639849, 46547245459, 46569967511 ],\n    \"samples_ts\": [ 2.75129, 2.74989, 2.74855 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:49:25Z",
+          "avg_ns": 39977597271,
+          "stddev_ns": 9051351,
+          "avg_ts": 12.807173,
+          "stddev_ts": 0.002899,
+          "samples_ns": [
+            39988039463,
+            39972703990,
+            39972048361
+          ],
+          "samples_ts": [
+            12.8038,
+            12.8087,
+            12.809
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:52:06Z",
+          "avg_ns": 46546950939,
+          "stddev_ns": 23167244,
+          "avg_ts": 2.749912,
+          "stddev_ts": 0.001369,
+          "samples_ns": [
+            46523639849,
+            46547245459,
+            46569967511
+          ],
+          "samples_ts": [
+            2.75129,
+            2.74989,
+            2.74855
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1110
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:06:30.856464+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:54:27Z\",\n    \"avg_ns\": 40008911334,\n    \"stddev_ns\": 4544139,\n    \"avg_ts\": 12.797149,\n    \"stddev_ts\": 0.001451,\n    \"samples_ns\": [ 40008980369, 40004341885, 40013411750 ],\n    \"samples_ts\": [ 12.7971, 12.7986, 12.7957 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T21:57:07Z\",\n    \"avg_ns\": 187623823130,\n    \"stddev_ns\": 55845387,\n    \"avg_ts\": 2.728865,\n    \"stddev_ts\": 0.000812,\n    \"samples_ns\": [ 187559375560, 187657891540, 187654202291 ],\n    \"samples_ts\": [ 2.7298, 2.72837, 2.72842 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:54:27Z",
+          "avg_ns": 40008911334,
+          "stddev_ns": 4544139,
+          "avg_ts": 12.797149,
+          "stddev_ts": 0.001451,
+          "samples_ns": [
+            40008980369,
+            40004341885,
+            40013411750
+          ],
+          "samples_ts": [
+            12.7971,
+            12.7986,
+            12.7957
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T21:57:07Z",
+          "avg_ns": 187623823130,
+          "stddev_ns": 55845387,
+          "avg_ts": 2.728865,
+          "stddev_ts": 0.000812,
+          "samples_ns": [
+            187559375560,
+            187657891540,
+            187654202291
+          ],
+          "samples_ts": [
+            2.7298,
+            2.72837,
+            2.72842
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1111
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:09:31.381479+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:06:31Z\",\n    \"avg_ns\": 9887829694,\n    \"stddev_ns\": 7032522,\n    \"avg_ts\": 12.945211,\n    \"stddev_ts\": 0.009207,\n    \"samples_ns\": [ 9880512336, 9888440639, 9894536108 ],\n    \"samples_ts\": [ 12.9548, 12.9444, 12.9364 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:07:11Z\",\n    \"avg_ns\": 46459027202,\n    \"stddev_ns\": 277464235,\n    \"avg_ts\": 2.755182,\n    \"stddev_ts\": 0.016511,\n    \"samples_ns\": [ 46138920516, 46630705415, 46607455676 ],\n    \"samples_ts\": [ 2.77423, 2.74497, 2.74634 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:06:31Z",
+          "avg_ns": 9887829694,
+          "stddev_ns": 7032522,
+          "avg_ts": 12.945211,
+          "stddev_ts": 0.009207,
+          "samples_ns": [
+            9880512336,
+            9888440639,
+            9894536108
+          ],
+          "samples_ts": [
+            12.9548,
+            12.9444,
+            12.9364
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:07:11Z",
+          "avg_ns": 46459027202,
+          "stddev_ns": 277464235,
+          "avg_ts": 2.755182,
+          "stddev_ts": 0.016511,
+          "samples_ns": [
+            46138920516,
+            46630705415,
+            46607455676
+          ],
+          "samples_ts": [
+            2.77423,
+            2.74497,
+            2.74634
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1112
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:19:30.134798+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:09:32Z\",\n    \"avg_ns\": 9885792275,\n    \"stddev_ns\": 4331087,\n    \"avg_ts\": 12.947876,\n    \"stddev_ts\": 0.005670,\n    \"samples_ns\": [ 9883493095, 9890786831, 9883096900 ],\n    \"samples_ts\": [ 12.9509, 12.9413, 12.9514 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:10:12Z\",\n    \"avg_ns\": 185864118159,\n    \"stddev_ns\": 2266745945,\n    \"avg_ts\": 2.754975,\n    \"stddev_ts\": 0.033702,\n    \"samples_ns\": [ 186246982119, 183430321674, 187915050684 ],\n    \"samples_ts\": [ 2.74904, 2.79125, 2.72464 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:09:32Z",
+          "avg_ns": 9885792275,
+          "stddev_ns": 4331087,
+          "avg_ts": 12.947876,
+          "stddev_ts": 0.00567,
+          "samples_ns": [
+            9883493095,
+            9890786831,
+            9883096900
+          ],
+          "samples_ts": [
+            12.9509,
+            12.9413,
+            12.9514
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:10:12Z",
+          "avg_ns": 185864118159,
+          "stddev_ns": 2266745945,
+          "avg_ts": 2.754975,
+          "stddev_ts": 0.033702,
+          "samples_ns": [
+            186246982119,
+            183430321674,
+            187915050684
+          ],
+          "samples_ts": [
+            2.74904,
+            2.79125,
+            2.72464
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1113
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:24:34.271710+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:19:31Z\",\n    \"avg_ns\": 40801227708,\n    \"stddev_ns\": 17403332,\n    \"avg_ts\": 12.548644,\n    \"stddev_ts\": 0.005352,\n    \"samples_ns\": [ 40820096801, 40797780147, 40785806176 ],\n    \"samples_ts\": [ 12.5428, 12.5497, 12.5534 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:22:14Z\",\n    \"avg_ns\": 46550349194,\n    \"stddev_ns\": 28982504,\n    \"avg_ts\": 2.749712,\n    \"stddev_ts\": 0.001712,\n    \"samples_ns\": [ 46527407233, 46540719573, 46582920776 ],\n    \"samples_ts\": [ 2.75107, 2.75028, 2.74779 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:19:31Z",
+          "avg_ns": 40801227708,
+          "stddev_ns": 17403332,
+          "avg_ts": 12.548644,
+          "stddev_ts": 0.005352,
+          "samples_ns": [
+            40820096801,
+            40797780147,
+            40785806176
+          ],
+          "samples_ts": [
+            12.5428,
+            12.5497,
+            12.5534
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:22:14Z",
+          "avg_ns": 46550349194,
+          "stddev_ns": 28982504,
+          "avg_ts": 2.749712,
+          "stddev_ts": 0.001712,
+          "samples_ns": [
+            46527407233,
+            46540719573,
+            46582920776
+          ],
+          "samples_ts": [
+            2.75107,
+            2.75028,
+            2.74779
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1114
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:36:34.276975+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:24:35Z\",\n    \"avg_ns\": 40804554213,\n    \"stddev_ns\": 6644612,\n    \"avg_ts\": 12.547619,\n    \"stddev_ts\": 0.002043,\n    \"samples_ns\": [ 40811718329, 40798593613, 40803350697 ],\n    \"samples_ts\": [ 12.5454, 12.5495, 12.548 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:27:18Z\",\n    \"avg_ns\": 185055948106,\n    \"stddev_ns\": 281692700,\n    \"avg_ts\": 2.767234,\n    \"stddev_ts\": 0.045828,\n    \"samples_ns\": [ 187760161070, 185657849556, 181749833692 ],\n    \"samples_ts\": [ 2.72688, 2.75776, 2.81706 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:24:35Z",
+          "avg_ns": 40804554213,
+          "stddev_ns": 6644612,
+          "avg_ts": 12.547619,
+          "stddev_ts": 0.002043,
+          "samples_ns": [
+            40811718329,
+            40798593613,
+            40803350697
+          ],
+          "samples_ts": [
+            12.5454,
+            12.5495,
+            12.548
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:27:18Z",
+          "avg_ns": 185055948106,
+          "stddev_ns": 281692700,
+          "avg_ts": 2.767234,
+          "stddev_ts": 0.045828,
+          "samples_ns": [
+            187760161070,
+            185657849556,
+            181749833692
+          ],
+          "samples_ts": [
+            2.72688,
+            2.75776,
+            2.81706
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1115
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:39:36.726848+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:36:35Z\",\n    \"avg_ns\": 7647674754,\n    \"stddev_ns\": 10642507,\n    \"avg_ts\": 16.737135,\n    \"stddev_ts\": 0.023289,\n    \"samples_ns\": [ 7637144800, 7647454505, 7658424959 ],\n    \"samples_ts\": [ 16.7602, 16.7376, 16.7136 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:37:05Z\",\n    \"avg_ns\": 50079938593,\n    \"stddev_ns\": 55489413,\n    \"avg_ts\": 2.555916,\n    \"stddev_ts\": 0.002830,\n    \"samples_ns\": [ 50037847755, 50142819952, 50059148074 ],\n    \"samples_ts\": [ 2.55806, 2.55271, 2.55698 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:36:35Z",
+          "avg_ns": 7647674754,
+          "stddev_ns": 10642507,
+          "avg_ts": 16.737135,
+          "stddev_ts": 0.023289,
+          "samples_ns": [
+            7637144800,
+            7647454505,
+            7658424959
+          ],
+          "samples_ts": [
+            16.7602,
+            16.7376,
+            16.7136
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:37:05Z",
+          "avg_ns": 50079938593,
+          "stddev_ns": 55489413,
+          "avg_ts": 2.555916,
+          "stddev_ts": 0.00283,
+          "samples_ns": [
+            50037847755,
+            50142819952,
+            50059148074
+          ],
+          "samples_ts": [
+            2.55806,
+            2.55271,
+            2.55698
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1116
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:50:13.802394+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:39:37Z\",\n    \"avg_ns\": 7640483407,\n    \"stddev_ns\": 5301903,\n    \"avg_ts\": 16.752872,\n    \"stddev_ts\": 0.011628,\n    \"samples_ns\": [ 7634471286, 7644486134, 7642492802 ],\n    \"samples_ts\": [ 16.7661, 16.7441, 16.7485 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:40:08Z\",\n    \"avg_ns\": 201627549968,\n    \"stddev_ns\": 131796235,\n    \"avg_ts\": 2.539336,\n    \"stddev_ts\": 0.001660,\n    \"samples_ns\": [ 201606402298, 201507606257, 201768641349 ],\n    \"samples_ts\": [ 2.5396, 2.54085, 2.53756 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:39:37Z",
+          "avg_ns": 7640483407,
+          "stddev_ns": 5301903,
+          "avg_ts": 16.752872,
+          "stddev_ts": 0.011628,
+          "samples_ns": [
+            7634471286,
+            7644486134,
+            7642492802
+          ],
+          "samples_ts": [
+            16.7661,
+            16.7441,
+            16.7485
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:40:08Z",
+          "avg_ns": 201627549968,
+          "stddev_ns": 131796235,
+          "avg_ts": 2.539336,
+          "stddev_ts": 0.00166,
+          "samples_ns": [
+            201606402298,
+            201507606257,
+            201768641349
+          ],
+          "samples_ts": [
+            2.5396,
+            2.54085,
+            2.53756
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1117
+    },
+    {
+      "timestamp_utc": "2025-12-10T22:54:48.027246+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:50:14Z\",\n    \"avg_ns\": 30633156657,\n    \"stddev_ns\": 14851097,\n    \"avg_ts\": 16.713918,\n    \"stddev_ts\": 0.008102,\n    \"samples_ns\": [ 30631910740, 30648590431, 30618968801 ],\n    \"samples_ts\": [ 16.7146, 16.7055, 16.7217 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:52:17Z\",\n    \"avg_ns\": 50025953000,\n    \"stddev_ns\": 28811560,\n    \"avg_ts\": 2.558672,\n    \"stddev_ts\": 0.001473,\n    \"samples_ns\": [ 50000539437, 50057253305, 50020066258 ],\n    \"samples_ts\": [ 2.55997, 2.55707, 2.55897 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:50:14Z",
+          "avg_ns": 30633156657,
+          "stddev_ns": 14851097,
+          "avg_ts": 16.713918,
+          "stddev_ts": 0.008102,
+          "samples_ns": [
+            30631910740,
+            30648590431,
+            30618968801
+          ],
+          "samples_ts": [
+            16.7146,
+            16.7055,
+            16.7217
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:52:17Z",
+          "avg_ns": 50025953000,
+          "stddev_ns": 28811560,
+          "avg_ts": 2.558672,
+          "stddev_ts": 0.001473,
+          "samples_ns": [
+            50000539437,
+            50057253305,
+            50020066258
+          ],
+          "samples_ts": [
+            2.55997,
+            2.55707,
+            2.55897
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1118
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:06:55.543453+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:54:49Z\",\n    \"avg_ns\": 30656284673,\n    \"stddev_ns\": 19216663,\n    \"avg_ts\": 16.701311,\n    \"stddev_ts\": 0.010466,\n    \"samples_ns\": [ 30677971139, 30641372894, 30649509986 ],\n    \"samples_ts\": [ 16.6895, 16.7094, 16.705 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T22:56:51Z\",\n    \"avg_ns\": 201086688511,\n    \"stddev_ns\": 84695206,\n    \"avg_ts\": 2.546166,\n    \"stddev_ts\": 0.001072,\n    \"samples_ns\": [ 201022176186, 201055291733, 201182597615 ],\n    \"samples_ts\": [ 2.54698, 2.54656, 2.54495 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:54:49Z",
+          "avg_ns": 30656284673,
+          "stddev_ns": 19216663,
+          "avg_ts": 16.701311,
+          "stddev_ts": 0.010466,
+          "samples_ns": [
+            30677971139,
+            30641372894,
+            30649509986
+          ],
+          "samples_ts": [
+            16.6895,
+            16.7094,
+            16.705
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T22:56:51Z",
+          "avg_ns": 201086688511,
+          "stddev_ns": 84695206,
+          "avg_ts": 2.546166,
+          "stddev_ts": 0.001072,
+          "samples_ns": [
+            201022176186,
+            201055291733,
+            201182597615
+          ],
+          "samples_ts": [
+            2.54698,
+            2.54656,
+            2.54495
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1119
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:09:57.922256+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:06:56Z\",\n    \"avg_ns\": 7650567785,\n    \"stddev_ns\": 6652217,\n    \"avg_ts\": 16.730793,\n    \"stddev_ts\": 0.014547,\n    \"samples_ns\": [ 7657235561, 7650536558, 7643931236 ],\n    \"samples_ts\": [ 16.7162, 16.7309, 16.7453 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:07:27Z\",\n    \"avg_ns\": 50040387361,\n    \"stddev_ns\": 72093571,\n    \"avg_ts\": 2.557937,\n    \"stddev_ts\": 0.003685,\n    \"samples_ns\": [ 49970090018, 50036921326, 50114150741 ],\n    \"samples_ts\": [ 2.56153, 2.55811, 2.55417 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:06:56Z",
+          "avg_ns": 7650567785,
+          "stddev_ns": 6652217,
+          "avg_ts": 16.730793,
+          "stddev_ts": 0.014547,
+          "samples_ns": [
+            7657235561,
+            7650536558,
+            7643931236
+          ],
+          "samples_ts": [
+            16.7162,
+            16.7309,
+            16.7453
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:07:27Z",
+          "avg_ns": 50040387361,
+          "stddev_ns": 72093571,
+          "avg_ts": 2.557937,
+          "stddev_ts": 0.003685,
+          "samples_ns": [
+            49970090018,
+            50036921326,
+            50114150741
+          ],
+          "samples_ts": [
+            2.56153,
+            2.55811,
+            2.55417
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1120
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:20:33.424282+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:09:59Z\",\n    \"avg_ns\": 7633439262,\n    \"stddev_ns\": 8799326,\n    \"avg_ts\": 16.768341,\n    \"stddev_ts\": 0.019315,\n    \"samples_ns\": [ 7628527538, 7643597014, 7628193236 ],\n    \"samples_ts\": [ 16.7791, 16.746, 16.7799 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:10:29Z\",\n    \"avg_ns\": 201102784385,\n    \"stddev_ns\": 58421945,\n    \"avg_ts\": 2.545962,\n    \"stddev_ts\": 0.000740,\n    \"samples_ns\": [ 201162968929, 201099075575, 201046308653 ],\n    \"samples_ts\": [ 2.5452, 2.54601, 2.54668 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:09:59Z",
+          "avg_ns": 7633439262,
+          "stddev_ns": 8799326,
+          "avg_ts": 16.768341,
+          "stddev_ts": 0.019315,
+          "samples_ns": [
+            7628527538,
+            7643597014,
+            7628193236
+          ],
+          "samples_ts": [
+            16.7791,
+            16.746,
+            16.7799
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:10:29Z",
+          "avg_ns": 201102784385,
+          "stddev_ns": 58421945,
+          "avg_ts": 2.545962,
+          "stddev_ts": 0.00074,
+          "samples_ns": [
+            201162968929,
+            201099075575,
+            201046308653
+          ],
+          "samples_ts": [
+            2.5452,
+            2.54601,
+            2.54668
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1121
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:25:08.181170+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:20:34Z\",\n    \"avg_ns\": 30812739799,\n    \"stddev_ns\": 53754060,\n    \"avg_ts\": 16.616537,\n    \"stddev_ts\": 0.028960,\n    \"samples_ns\": [ 30874491591, 30776429186, 30787298620 ],\n    \"samples_ts\": [ 16.5833, 16.6361, 16.6302 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:22:37Z\",\n    \"avg_ns\": 49956697832,\n    \"stddev_ns\": 44185311,\n    \"avg_ts\": 2.562220,\n    \"stddev_ts\": 0.002265,\n    \"samples_ns\": [ 49932294896, 50007701548, 49930097054 ],\n    \"samples_ts\": [ 2.56347, 2.55961, 2.56358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:20:34Z",
+          "avg_ns": 30812739799,
+          "stddev_ns": 53754060,
+          "avg_ts": 16.616537,
+          "stddev_ts": 0.02896,
+          "samples_ns": [
+            30874491591,
+            30776429186,
+            30787298620
+          ],
+          "samples_ts": [
+            16.5833,
+            16.6361,
+            16.6302
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:22:37Z",
+          "avg_ns": 49956697832,
+          "stddev_ns": 44185311,
+          "avg_ts": 2.56222,
+          "stddev_ts": 0.002265,
+          "samples_ns": [
+            49932294896,
+            50007701548,
+            49930097054
+          ],
+          "samples_ts": [
+            2.56347,
+            2.55961,
+            2.56358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1122
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:37:15.815469+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:25:09Z\",\n    \"avg_ns\": 30877004354,\n    \"stddev_ns\": 5152757,\n    \"avg_ts\": 16.581920,\n    \"stddev_ts\": 0.002764,\n    \"samples_ns\": [ 30882616354, 30872504764, 30875891946 ],\n    \"samples_ts\": [ 16.5789, 16.5843, 16.5825 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:27:12Z\",\n    \"avg_ns\": 200805118624,\n    \"stddev_ns\": 84987439,\n    \"avg_ts\": 2.549736,\n    \"stddev_ts\": 0.001079,\n    \"samples_ns\": [ 200714943607, 200816679819, 200883732447 ],\n    \"samples_ts\": [ 2.55088, 2.54959, 2.54874 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:25:09Z",
+          "avg_ns": 30877004354,
+          "stddev_ns": 5152757,
+          "avg_ts": 16.58192,
+          "stddev_ts": 0.002764,
+          "samples_ns": [
+            30882616354,
+            30872504764,
+            30875891946
+          ],
+          "samples_ts": [
+            16.5789,
+            16.5843,
+            16.5825
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:27:12Z",
+          "avg_ns": 200805118624,
+          "stddev_ns": 84987439,
+          "avg_ts": 2.549736,
+          "stddev_ts": 0.001079,
+          "samples_ns": [
+            200714943607,
+            200816679819,
+            200883732447
+          ],
+          "samples_ts": [
+            2.55088,
+            2.54959,
+            2.54874
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1123
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:40:18.076874+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:37:16Z\",\n    \"avg_ns\": 7634413459,\n    \"stddev_ns\": 2260235,\n    \"avg_ts\": 16.766187,\n    \"stddev_ts\": 0.004960,\n    \"samples_ns\": [ 7636705387, 7632189844, 7634345147 ],\n    \"samples_ts\": [ 16.7612, 16.7711, 16.7663 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:37:47Z\",\n    \"avg_ns\": 50017485523,\n    \"stddev_ns\": 16238458,\n    \"avg_ts\": 2.559105,\n    \"stddev_ts\": 0.000831,\n    \"samples_ns\": [ 50004005107, 50035508297, 50012943167 ],\n    \"samples_ts\": [ 2.55979, 2.55818, 2.55934 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:37:16Z",
+          "avg_ns": 7634413459,
+          "stddev_ns": 2260235,
+          "avg_ts": 16.766187,
+          "stddev_ts": 0.00496,
+          "samples_ns": [
+            7636705387,
+            7632189844,
+            7634345147
+          ],
+          "samples_ts": [
+            16.7612,
+            16.7711,
+            16.7663
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:37:47Z",
+          "avg_ns": 50017485523,
+          "stddev_ns": 16238458,
+          "avg_ts": 2.559105,
+          "stddev_ts": 0.000831,
+          "samples_ns": [
+            50004005107,
+            50035508297,
+            50012943167
+          ],
+          "samples_ts": [
+            2.55979,
+            2.55818,
+            2.55934
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1124
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:50:53.034116+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:40:19Z\",\n    \"avg_ns\": 7635763119,\n    \"stddev_ns\": 5702123,\n    \"avg_ts\": 16.763229,\n    \"stddev_ts\": 0.012515,\n    \"samples_ns\": [ 7641437728, 7635815116, 7630036515 ],\n    \"samples_ts\": [ 16.7508, 16.7631, 16.7758 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:40:49Z\",\n    \"avg_ns\": 200919262337,\n    \"stddev_ns\": 88549722,\n    \"avg_ts\": 2.548288,\n    \"stddev_ts\": 0.001123,\n    \"samples_ns\": [ 200842021527, 200899865121, 201015900365 ],\n    \"samples_ts\": [ 2.54927, 2.54853, 2.54706 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:40:19Z",
+          "avg_ns": 7635763119,
+          "stddev_ns": 5702123,
+          "avg_ts": 16.763229,
+          "stddev_ts": 0.012515,
+          "samples_ns": [
+            7641437728,
+            7635815116,
+            7630036515
+          ],
+          "samples_ts": [
+            16.7508,
+            16.7631,
+            16.7758
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:40:49Z",
+          "avg_ns": 200919262337,
+          "stddev_ns": 88549722,
+          "avg_ts": 2.548288,
+          "stddev_ts": 0.001123,
+          "samples_ns": [
+            200842021527,
+            200899865121,
+            201015900365
+          ],
+          "samples_ts": [
+            2.54927,
+            2.54853,
+            2.54706
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1125
+    },
+    {
+      "timestamp_utc": "2025-12-10T23:55:30.414900+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:50:54Z\",\n    \"avg_ns\": 31475697300,\n    \"stddev_ns\": 137718358,\n    \"avg_ts\": 16.266726,\n    \"stddev_ts\": 0.071142,\n    \"samples_ns\": [ 31342979170, 31466189993, 31617922739 ],\n    \"samples_ts\": [ 16.3354, 16.2714, 16.1933 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:52:59Z\",\n    \"avg_ns\": 49971199482,\n    \"stddev_ns\": 13394274,\n    \"avg_ts\": 2.561476,\n    \"stddev_ts\": 0.000686,\n    \"samples_ns\": [ 49956140823, 49975690024, 49981767601 ],\n    \"samples_ts\": [ 2.56225, 2.56125, 2.56093 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:50:54Z",
+          "avg_ns": 31475697300,
+          "stddev_ns": 137718358,
+          "avg_ts": 16.266726,
+          "stddev_ts": 0.071142,
+          "samples_ns": [
+            31342979170,
+            31466189993,
+            31617922739
+          ],
+          "samples_ts": [
+            16.3354,
+            16.2714,
+            16.1933
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:52:59Z",
+          "avg_ns": 49971199482,
+          "stddev_ns": 13394274,
+          "avg_ts": 2.561476,
+          "stddev_ts": 0.000686,
+          "samples_ns": [
+            49956140823,
+            49975690024,
+            49981767601
+          ],
+          "samples_ts": [
+            2.56225,
+            2.56125,
+            2.56093
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1126
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:07:39.988346+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:55:31Z\",\n    \"avg_ns\": 31529998624,\n    \"stddev_ns\": 158279010,\n    \"avg_ts\": 16.238777,\n    \"stddev_ts\": 0.081685,\n    \"samples_ns\": [ 31353507177, 31577130820, 31659357875 ],\n    \"samples_ts\": [ 16.3299, 16.2143, 16.1722 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-10T23:57:37Z\",\n    \"avg_ns\": 200650068054,\n    \"stddev_ns\": 735301058,\n    \"avg_ts\": 2.551729,\n    \"stddev_ts\": 0.009361,\n    \"samples_ns\": [ 201302988081, 200793649768, 199853566313 ],\n    \"samples_ts\": [ 2.54343, 2.54988, 2.56188 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:55:31Z",
+          "avg_ns": 31529998624,
+          "stddev_ns": 158279010,
+          "avg_ts": 16.238777,
+          "stddev_ts": 0.081685,
+          "samples_ns": [
+            31353507177,
+            31577130820,
+            31659357875
+          ],
+          "samples_ts": [
+            16.3299,
+            16.2143,
+            16.1722
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-10T23:57:37Z",
+          "avg_ns": 200650068054,
+          "stddev_ns": 735301058,
+          "avg_ts": 2.551729,
+          "stddev_ts": 0.009361,
+          "samples_ns": [
+            201302988081,
+            200793649768,
+            199853566313
+          ],
+          "samples_ts": [
+            2.54343,
+            2.54988,
+            2.56188
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1127
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:10:42.086576+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:07:41Z\",\n    \"avg_ns\": 7638830472,\n    \"stddev_ns\": 7804236,\n    \"avg_ts\": 16.756503,\n    \"stddev_ts\": 0.017126,\n    \"samples_ns\": [ 7629929657, 7642067436, 7644494325 ],\n    \"samples_ts\": [ 16.776, 16.7494, 16.7441 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:08:11Z\",\n    \"avg_ns\": 49971484449,\n    \"stddev_ns\": 97252139,\n    \"avg_ts\": 2.561467,\n    \"stddev_ts\": 0.004988,\n    \"samples_ns\": [ 49865366318, 49992730227, 50056356804 ],\n    \"samples_ts\": [ 2.56691, 2.56037, 2.55712 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:07:41Z",
+          "avg_ns": 7638830472,
+          "stddev_ns": 7804236,
+          "avg_ts": 16.756503,
+          "stddev_ts": 0.017126,
+          "samples_ns": [
+            7629929657,
+            7642067436,
+            7644494325
+          ],
+          "samples_ts": [
+            16.776,
+            16.7494,
+            16.7441
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:08:11Z",
+          "avg_ns": 49971484449,
+          "stddev_ns": 97252139,
+          "avg_ts": 2.561467,
+          "stddev_ts": 0.004988,
+          "samples_ns": [
+            49865366318,
+            49992730227,
+            50056356804
+          ],
+          "samples_ts": [
+            2.56691,
+            2.56037,
+            2.55712
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1128
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:21:17.088402+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:10:43Z\",\n    \"avg_ns\": 7630998725,\n    \"stddev_ns\": 3068989,\n    \"avg_ts\": 16.773691,\n    \"stddev_ts\": 0.006745,\n    \"samples_ns\": [ 7632286806, 7633212429, 7627496941 ],\n    \"samples_ts\": [ 16.7709, 16.7688, 16.7814 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:11:13Z\",\n    \"avg_ns\": 200941842162,\n    \"stddev_ns\": 91946281,\n    \"avg_ts\": 2.548001,\n    \"stddev_ts\": 0.001166,\n    \"samples_ns\": [ 200883032774, 200894696157, 201047797556 ],\n    \"samples_ts\": [ 2.54875, 2.5486, 2.54666 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:10:43Z",
+          "avg_ns": 7630998725,
+          "stddev_ns": 3068989,
+          "avg_ts": 16.773691,
+          "stddev_ts": 0.006745,
+          "samples_ns": [
+            7632286806,
+            7633212429,
+            7627496941
+          ],
+          "samples_ts": [
+            16.7709,
+            16.7688,
+            16.7814
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:11:13Z",
+          "avg_ns": 200941842162,
+          "stddev_ns": 91946281,
+          "avg_ts": 2.548001,
+          "stddev_ts": 0.001166,
+          "samples_ns": [
+            200883032774,
+            200894696157,
+            201047797556
+          ],
+          "samples_ts": [
+            2.54875,
+            2.5486,
+            2.54666
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1129
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:25:51.136984+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:21:18Z\",\n    \"avg_ns\": 30635815542,\n    \"stddev_ns\": 21618650,\n    \"avg_ts\": 16.712471,\n    \"stddev_ts\": 0.011797,\n    \"samples_ns\": [ 30648951830, 30647629000, 30610865798 ],\n    \"samples_ts\": [ 16.7053, 16.706, 16.7261 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:23:20Z\",\n    \"avg_ns\": 49941262458,\n    \"stddev_ns\": 35347156,\n    \"avg_ts\": 2.563012,\n    \"stddev_ts\": 0.001815,\n    \"samples_ns\": [ 49901802670, 49970023708, 49951960997 ],\n    \"samples_ts\": [ 2.56504, 2.56154, 2.56246 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:21:18Z",
+          "avg_ns": 30635815542,
+          "stddev_ns": 21618650,
+          "avg_ts": 16.712471,
+          "stddev_ts": 0.011797,
+          "samples_ns": [
+            30648951830,
+            30647629000,
+            30610865798
+          ],
+          "samples_ts": [
+            16.7053,
+            16.706,
+            16.7261
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:23:20Z",
+          "avg_ns": 49941262458,
+          "stddev_ns": 35347156,
+          "avg_ts": 2.563012,
+          "stddev_ts": 0.001815,
+          "samples_ns": [
+            49901802670,
+            49970023708,
+            49951960997
+          ],
+          "samples_ts": [
+            2.56504,
+            2.56154,
+            2.56246
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1130
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:37:37.542340+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:25:52Z\",\n    \"avg_ns\": 30584258390,\n    \"stddev_ns\": 4609355,\n    \"avg_ts\": 16.740638,\n    \"stddev_ts\": 0.002523,\n    \"samples_ns\": [ 30588539333, 30584856769, 30579379068 ],\n    \"samples_ts\": [ 16.7383, 16.7403, 16.7433 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:27:54Z\",\n    \"avg_ns\": 194139920981,\n    \"stddev_ns\": 1397297804,\n    \"avg_ts\": 2.637364,\n    \"stddev_ts\": 0.018906,\n    \"samples_ns\": [ 195748394946, 193225912497, 193445455500 ],\n    \"samples_ts\": [ 2.6156, 2.64975, 2.64674 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:25:52Z",
+          "avg_ns": 30584258390,
+          "stddev_ns": 4609355,
+          "avg_ts": 16.740638,
+          "stddev_ts": 0.002523,
+          "samples_ns": [
+            30588539333,
+            30584856769,
+            30579379068
+          ],
+          "samples_ts": [
+            16.7383,
+            16.7403,
+            16.7433
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:27:54Z",
+          "avg_ns": 194139920981,
+          "stddev_ns": 1397297804,
+          "avg_ts": 2.637364,
+          "stddev_ts": 0.018906,
+          "samples_ns": [
+            195748394946,
+            193225912497,
+            193445455500
+          ],
+          "samples_ts": [
+            2.6156,
+            2.64975,
+            2.64674
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1131
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:40:38.575941+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:37:38Z\",\n    \"avg_ns\": 7646694218,\n    \"stddev_ns\": 3795546,\n    \"avg_ts\": 16.739262,\n    \"stddev_ts\": 0.008308,\n    \"samples_ns\": [ 7650031472, 7647484367, 7642566816 ],\n    \"samples_ts\": [ 16.732, 16.7375, 16.7483 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:38:09Z\",\n    \"avg_ns\": 49607841184,\n    \"stddev_ns\": 473591824,\n    \"avg_ts\": 2.580395,\n    \"stddev_ts\": 0.024768,\n    \"samples_ns\": [ 49848489412, 49912788090, 49062246052 ],\n    \"samples_ts\": [ 2.56778, 2.56447, 2.60893 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:37:38Z",
+          "avg_ns": 7646694218,
+          "stddev_ns": 3795546,
+          "avg_ts": 16.739262,
+          "stddev_ts": 0.008308,
+          "samples_ns": [
+            7650031472,
+            7647484367,
+            7642566816
+          ],
+          "samples_ts": [
+            16.732,
+            16.7375,
+            16.7483
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:38:09Z",
+          "avg_ns": 49607841184,
+          "stddev_ns": 473591824,
+          "avg_ts": 2.580395,
+          "stddev_ts": 0.024768,
+          "samples_ns": [
+            49848489412,
+            49912788090,
+            49062246052
+          ],
+          "samples_ts": [
+            2.56778,
+            2.56447,
+            2.60893
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1132
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:51:05.570143+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:40:39Z\",\n    \"avg_ns\": 7628493364,\n    \"stddev_ns\": 1848475,\n    \"avg_ts\": 16.779199,\n    \"stddev_ts\": 0.004057,\n    \"samples_ns\": [ 7629706977, 7626370985, 7629402132 ],\n    \"samples_ts\": [ 16.7765, 16.7839, 16.7772 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:41:10Z\",\n    \"avg_ns\": 198278500906,\n    \"stddev_ns\": 3855045812,\n    \"avg_ts\": 2.582472,\n    \"stddev_ts\": 0.030745,\n    \"samples_ns\": [ 200974207774, 197364097755, 196497197191 ],\n    \"samples_ts\": [ 2.54759, 2.59419, 2.60564 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:40:39Z",
+          "avg_ns": 7628493364,
+          "stddev_ns": 1848475,
+          "avg_ts": 16.779199,
+          "stddev_ts": 0.004057,
+          "samples_ns": [
+            7629706977,
+            7626370985,
+            7629402132
+          ],
+          "samples_ts": [
+            16.7765,
+            16.7839,
+            16.7772
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:41:10Z",
+          "avg_ns": 198278500906,
+          "stddev_ns": 3855045812,
+          "avg_ts": 2.582472,
+          "stddev_ts": 0.030745,
+          "samples_ns": [
+            200974207774,
+            197364097755,
+            196497197191
+          ],
+          "samples_ts": [
+            2.54759,
+            2.59419,
+            2.60564
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1133
+    },
+    {
+      "timestamp_utc": "2025-12-11T00:55:39.167054+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:51:06Z\",\n    \"avg_ns\": 30785894986,\n    \"stddev_ns\": 8588980,\n    \"avg_ts\": 16.630994,\n    \"stddev_ts\": 0.004639,\n    \"samples_ns\": [ 30792721766, 30776253747, 30788709446 ],\n    \"samples_ts\": [ 16.6273, 16.6362, 16.6295 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:53:09Z\",\n    \"avg_ns\": 49595050861,\n    \"stddev_ns\": 538709562,\n    \"avg_ts\": 2.581107,\n    \"stddev_ts\": 0.028213,\n    \"samples_ns\": [ 49901399059, 49910727516, 48973026009 ],\n    \"samples_ts\": [ 2.56506, 2.56458, 2.61368 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:51:06Z",
+          "avg_ns": 30785894986,
+          "stddev_ns": 8588980,
+          "avg_ts": 16.630994,
+          "stddev_ts": 0.004639,
+          "samples_ns": [
+            30792721766,
+            30776253747,
+            30788709446
+          ],
+          "samples_ts": [
+            16.6273,
+            16.6362,
+            16.6295
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:53:09Z",
+          "avg_ns": 49595050861,
+          "stddev_ns": 538709562,
+          "avg_ts": 2.581107,
+          "stddev_ts": 0.028213,
+          "samples_ns": [
+            49901399059,
+            49910727516,
+            48973026009
+          ],
+          "samples_ts": [
+            2.56506,
+            2.56458,
+            2.61368
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1134
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:07:30.165309+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:55:40Z\",\n    \"avg_ns\": 30764043750,\n    \"stddev_ns\": 8579008,\n    \"avg_ts\": 16.642806,\n    \"stddev_ts\": 0.004640,\n    \"samples_ns\": [ 30762400125, 30773323843, 30756407283 ],\n    \"samples_ts\": [ 16.6437, 16.6378, 16.6469 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T00:57:43Z\",\n    \"avg_ns\": 195392272297,\n    \"stddev_ns\": 3980101918,\n    \"avg_ts\": 2.621086,\n    \"stddev_ts\": 0.052773,\n    \"samples_ns\": [ 199985737669, 193223086428, 192967992794 ],\n    \"samples_ts\": [ 2.56018, 2.64979, 2.65329 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:55:40Z",
+          "avg_ns": 30764043750,
+          "stddev_ns": 8579008,
+          "avg_ts": 16.642806,
+          "stddev_ts": 0.00464,
+          "samples_ns": [
+            30762400125,
+            30773323843,
+            30756407283
+          ],
+          "samples_ts": [
+            16.6437,
+            16.6378,
+            16.6469
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T00:57:43Z",
+          "avg_ns": 195392272297,
+          "stddev_ns": 3980101918,
+          "avg_ts": 2.621086,
+          "stddev_ts": 0.052773,
+          "samples_ns": [
+            199985737669,
+            193223086428,
+            192967992794
+          ],
+          "samples_ts": [
+            2.56018,
+            2.64979,
+            2.65329
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1135
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:10:29.861548+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:07:31Z\",\n    \"avg_ns\": 7634091298,\n    \"stddev_ns\": 6961445,\n    \"avg_ts\": 16.766903,\n    \"stddev_ts\": 0.015294,\n    \"samples_ns\": [ 7626134769, 7639052760, 7637086367 ],\n    \"samples_ts\": [ 16.7844, 16.756, 16.7603 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:08:01Z\",\n    \"avg_ns\": 49168462158,\n    \"stddev_ns\": 1044574448,\n    \"avg_ts\": 2.604088,\n    \"stddev_ts\": 0.056007,\n    \"samples_ns\": [ 49805695715, 49736742117, 47962948642 ],\n    \"samples_ts\": [ 2.56999, 2.57355, 2.66873 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:07:31Z",
+          "avg_ns": 7634091298,
+          "stddev_ns": 6961445,
+          "avg_ts": 16.766903,
+          "stddev_ts": 0.015294,
+          "samples_ns": [
+            7626134769,
+            7639052760,
+            7637086367
+          ],
+          "samples_ts": [
+            16.7844,
+            16.756,
+            16.7603
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:08:01Z",
+          "avg_ns": 49168462158,
+          "stddev_ns": 1044574448,
+          "avg_ts": 2.604088,
+          "stddev_ts": 0.056007,
+          "samples_ns": [
+            49805695715,
+            49736742117,
+            47962948642
+          ],
+          "samples_ts": [
+            2.56999,
+            2.57355,
+            2.66873
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1136
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:20:46.010267+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:10:30Z\",\n    \"avg_ns\": 7630301112,\n    \"stddev_ns\": 3098051,\n    \"avg_ts\": 16.775224,\n    \"stddev_ts\": 0.006806,\n    \"samples_ns\": [ 7633132174, 7630775419, 7626995745 ],\n    \"samples_ts\": [ 16.769, 16.7742, 16.7825 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:11:01Z\",\n    \"avg_ns\": 194662457468,\n    \"stddev_ns\": 3810917266,\n    \"avg_ts\": 2.630437,\n    \"stddev_ts\": 0.030897,\n    \"samples_ns\": [ 197320705433, 193330821936, 193335845037 ],\n    \"samples_ts\": [ 2.59476, 2.64831, 2.64824 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:10:30Z",
+          "avg_ns": 7630301112,
+          "stddev_ns": 3098051,
+          "avg_ts": 16.775224,
+          "stddev_ts": 0.006806,
+          "samples_ns": [
+            7633132174,
+            7630775419,
+            7626995745
+          ],
+          "samples_ts": [
+            16.769,
+            16.7742,
+            16.7825
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:11:01Z",
+          "avg_ns": 194662457468,
+          "stddev_ns": 3810917266,
+          "avg_ts": 2.630437,
+          "stddev_ts": 0.030897,
+          "samples_ns": [
+            197320705433,
+            193330821936,
+            193335845037
+          ],
+          "samples_ts": [
+            2.59476,
+            2.64831,
+            2.64824
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1137
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:25:21.282228+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:20:47Z\",\n    \"avg_ns\": 31374152511,\n    \"stddev_ns\": 19142770,\n    \"avg_ts\": 16.319170,\n    \"stddev_ts\": 0.009956,\n    \"samples_ns\": [ 31372198935, 31394197160, 31356061438 ],\n    \"samples_ts\": [ 16.3202, 16.3087, 16.3286 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:22:52Z\",\n    \"avg_ns\": 49384173690,\n    \"stddev_ns\": 1004817551,\n    \"avg_ts\": 2.592648,\n    \"stddev_ts\": 0.053380,\n    \"samples_ns\": [ 49960362651, 49968239130, 48223919291 ],\n    \"samples_ts\": [ 2.56203, 2.56163, 2.65428 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:20:47Z",
+          "avg_ns": 31374152511,
+          "stddev_ns": 19142770,
+          "avg_ts": 16.31917,
+          "stddev_ts": 0.009956,
+          "samples_ns": [
+            31372198935,
+            31394197160,
+            31356061438
+          ],
+          "samples_ts": [
+            16.3202,
+            16.3087,
+            16.3286
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:22:52Z",
+          "avg_ns": 49384173690,
+          "stddev_ns": 1004817551,
+          "avg_ts": 2.592648,
+          "stddev_ts": 0.05338,
+          "samples_ns": [
+            49960362651,
+            49968239130,
+            48223919291
+          ],
+          "samples_ts": [
+            2.56203,
+            2.56163,
+            2.65428
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1138
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:37:12.683532+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:25:22Z\",\n    \"avg_ns\": 31393662959,\n    \"stddev_ns\": 8732379,\n    \"avg_ts\": 16.309025,\n    \"stddev_ts\": 0.004536,\n    \"samples_ns\": [ 31398235478, 31383595819, 31399157581 ],\n    \"samples_ts\": [ 16.3066, 16.3143, 16.3062 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:27:28Z\",\n    \"avg_ns\": 194709982048,\n    \"stddev_ns\": 3777504221,\n    \"avg_ts\": 2.629784,\n    \"stddev_ts\": 0.030145,\n    \"samples_ns\": [ 197295004763, 193602968076, 193231973305 ],\n    \"samples_ts\": [ 2.5951, 2.64459, 2.64967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:25:22Z",
+          "avg_ns": 31393662959,
+          "stddev_ns": 8732379,
+          "avg_ts": 16.309025,
+          "stddev_ts": 0.004536,
+          "samples_ns": [
+            31398235478,
+            31383595819,
+            31399157581
+          ],
+          "samples_ts": [
+            16.3066,
+            16.3143,
+            16.3062
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:27:28Z",
+          "avg_ns": 194709982048,
+          "stddev_ns": 3777504221,
+          "avg_ts": 2.629784,
+          "stddev_ts": 0.030145,
+          "samples_ns": [
+            197295004763,
+            193602968076,
+            193231973305
+          ],
+          "samples_ts": [
+            2.5951,
+            2.64459,
+            2.64967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1139
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:40:10.167752+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:37:13Z\",\n    \"avg_ns\": 7633997301,\n    \"stddev_ns\": 5076031,\n    \"avg_ts\": 16.767105,\n    \"stddev_ts\": 0.011153,\n    \"samples_ns\": [ 7628227553, 7635988527, 7637775823 ],\n    \"samples_ts\": [ 16.7798, 16.7627, 16.7588 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:37:44Z\",\n    \"avg_ns\": 48430790075,\n    \"stddev_ns\": 692738499,\n    \"avg_ts\": 2.643304,\n    \"stddev_ts\": 0.037500,\n    \"samples_ns\": [ 49230634526, 48022310911, 48039424789 ],\n    \"samples_ts\": [ 2.60001, 2.66543, 2.66448 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:37:13Z",
+          "avg_ns": 7633997301,
+          "stddev_ns": 5076031,
+          "avg_ts": 16.767105,
+          "stddev_ts": 0.011153,
+          "samples_ns": [
+            7628227553,
+            7635988527,
+            7637775823
+          ],
+          "samples_ts": [
+            16.7798,
+            16.7627,
+            16.7588
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:37:44Z",
+          "avg_ns": 48430790075,
+          "stddev_ns": 692738499,
+          "avg_ts": 2.643304,
+          "stddev_ts": 0.0375,
+          "samples_ns": [
+            49230634526,
+            48022310911,
+            48039424789
+          ],
+          "samples_ts": [
+            2.60001,
+            2.66543,
+            2.66448
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1140
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:50:25.681683+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:40:11Z\",\n    \"avg_ns\": 7633639701,\n    \"stddev_ns\": 7283283,\n    \"avg_ts\": 16.767896,\n    \"stddev_ts\": 0.015998,\n    \"samples_ns\": [ 7641009262, 7633463965, 7626445876 ],\n    \"samples_ts\": [ 16.7517, 16.7683, 16.7837 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:40:41Z\",\n    \"avg_ns\": 194445498887,\n    \"stddev_ns\": 3329484067,\n    \"avg_ts\": 2.633215,\n    \"stddev_ts\": 0.018408,\n    \"samples_ns\": [ 196014076463, 193531729538, 193790690661 ],\n    \"samples_ts\": [ 2.61206, 2.64556, 2.64203 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:40:11Z",
+          "avg_ns": 7633639701,
+          "stddev_ns": 7283283,
+          "avg_ts": 16.767896,
+          "stddev_ts": 0.015998,
+          "samples_ns": [
+            7641009262,
+            7633463965,
+            7626445876
+          ],
+          "samples_ts": [
+            16.7517,
+            16.7683,
+            16.7837
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:40:41Z",
+          "avg_ns": 194445498887,
+          "stddev_ns": 3329484067,
+          "avg_ts": 2.633215,
+          "stddev_ts": 0.018408,
+          "samples_ns": [
+            196014076463,
+            193531729538,
+            193790690661
+          ],
+          "samples_ts": [
+            2.61206,
+            2.64556,
+            2.64203
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1141
+    },
+    {
+      "timestamp_utc": "2025-12-11T01:54:54.988121+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:50:26Z\",\n    \"avg_ns\": 30638983936,\n    \"stddev_ns\": 36177203,\n    \"avg_ts\": 16.710752,\n    \"stddev_ts\": 0.019744,\n    \"samples_ns\": [ 30661663770, 30658024577, 30597263462 ],\n    \"samples_ts\": [ 16.6984, 16.7004, 16.7335 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:52:29Z\",\n    \"avg_ns\": 48382254912,\n    \"stddev_ns\": 483320706,\n    \"avg_ts\": 2.645773,\n    \"stddev_ts\": 0.026279,\n    \"samples_ns\": [ 48940339423, 48100950731, 48105474583 ],\n    \"samples_ts\": [ 2.61543, 2.66107, 2.66082 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:50:26Z",
+          "avg_ns": 30638983936,
+          "stddev_ns": 36177203,
+          "avg_ts": 16.710752,
+          "stddev_ts": 0.019744,
+          "samples_ns": [
+            30661663770,
+            30658024577,
+            30597263462
+          ],
+          "samples_ts": [
+            16.6984,
+            16.7004,
+            16.7335
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:52:29Z",
+          "avg_ns": 48382254912,
+          "stddev_ns": 483320706,
+          "avg_ts": 2.645773,
+          "stddev_ts": 0.026279,
+          "samples_ns": [
+            48940339423,
+            48100950731,
+            48105474583
+          ],
+          "samples_ts": [
+            2.61543,
+            2.66107,
+            2.66082
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1142
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:06:38.509664+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:54:56Z\",\n    \"avg_ns\": 30573452633,\n    \"stddev_ns\": 19259208,\n    \"avg_ts\": 16.746559,\n    \"stddev_ts\": 0.010547,\n    \"samples_ns\": [ 30594889232, 30567859884, 30557608783 ],\n    \"samples_ts\": [ 16.7348, 16.7496, 16.7552 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T01:56:58Z\",\n    \"avg_ns\": 193187950669,\n    \"stddev_ns\": 99988687,\n    \"avg_ts\": 2.650269,\n    \"stddev_ts\": 0.001372,\n    \"samples_ns\": [ 193095284665, 193293928423, 193174638919 ],\n    \"samples_ts\": [ 2.65154, 2.64882, 2.65045 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:54:56Z",
+          "avg_ns": 30573452633,
+          "stddev_ns": 19259208,
+          "avg_ts": 16.746559,
+          "stddev_ts": 0.010547,
+          "samples_ns": [
+            30594889232,
+            30567859884,
+            30557608783
+          ],
+          "samples_ts": [
+            16.7348,
+            16.7496,
+            16.7552
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T01:56:58Z",
+          "avg_ns": 193187950669,
+          "stddev_ns": 99988687,
+          "avg_ts": 2.650269,
+          "stddev_ts": 0.001372,
+          "samples_ns": [
+            193095284665,
+            193293928423,
+            193174638919
+          ],
+          "samples_ts": [
+            2.65154,
+            2.64882,
+            2.65045
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1143
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:09:34.921695+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:06:39Z\",\n    \"avg_ns\": 7642833359,\n    \"stddev_ns\": 11728919,\n    \"avg_ts\": 16.747742,\n    \"stddev_ts\": 0.025705,\n    \"samples_ns\": [ 7654194617, 7643537001, 7630768459 ],\n    \"samples_ts\": [ 16.7229, 16.7462, 16.7742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:07:10Z\",\n    \"avg_ns\": 48069709100,\n    \"stddev_ns\": 51861608,\n    \"avg_ts\": 2.662802,\n    \"stddev_ts\": 0.002874,\n    \"samples_ns\": [ 48114729848, 48081395620, 48013001833 ],\n    \"samples_ts\": [ 2.66031, 2.66215, 2.66594 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:06:39Z",
+          "avg_ns": 7642833359,
+          "stddev_ns": 11728919,
+          "avg_ts": 16.747742,
+          "stddev_ts": 0.025705,
+          "samples_ns": [
+            7654194617,
+            7643537001,
+            7630768459
+          ],
+          "samples_ts": [
+            16.7229,
+            16.7462,
+            16.7742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:07:10Z",
+          "avg_ns": 48069709100,
+          "stddev_ns": 51861608,
+          "avg_ts": 2.662802,
+          "stddev_ts": 0.002874,
+          "samples_ns": [
+            48114729848,
+            48081395620,
+            48013001833
+          ],
+          "samples_ts": [
+            2.66031,
+            2.66215,
+            2.66594
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1144
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:19:47.531948+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:09:36Z\",\n    \"avg_ns\": 7632553576,\n    \"stddev_ns\": 3206856,\n    \"avg_ts\": 16.770274,\n    \"stddev_ts\": 0.007045,\n    \"samples_ns\": [ 7636170132, 7631434006, 7630056590 ],\n    \"samples_ts\": [ 16.7623, 16.7727, 16.7758 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:10:06Z\",\n    \"avg_ns\": 193471997449,\n    \"stddev_ns\": 38672581,\n    \"avg_ts\": 2.646378,\n    \"stddev_ts\": 0.000529,\n    \"samples_ns\": [ 193448115302, 193516609824, 193451267223 ],\n    \"samples_ts\": [ 2.6467, 2.64577, 2.64666 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:09:36Z",
+          "avg_ns": 7632553576,
+          "stddev_ns": 3206856,
+          "avg_ts": 16.770274,
+          "stddev_ts": 0.007045,
+          "samples_ns": [
+            7636170132,
+            7631434006,
+            7630056590
+          ],
+          "samples_ts": [
+            16.7623,
+            16.7727,
+            16.7758
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:10:06Z",
+          "avg_ns": 193471997449,
+          "stddev_ns": 38672581,
+          "avg_ts": 2.646378,
+          "stddev_ts": 0.000529,
+          "samples_ns": [
+            193448115302,
+            193516609824,
+            193451267223
+          ],
+          "samples_ts": [
+            2.6467,
+            2.64577,
+            2.64666
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1145
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:24:16.221904+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:19:48Z\",\n    \"avg_ns\": 30755652883,\n    \"stddev_ns\": 16174325,\n    \"avg_ts\": 16.647349,\n    \"stddev_ts\": 0.008752,\n    \"samples_ns\": [ 30774139683, 30744110030, 30748708936 ],\n    \"samples_ts\": [ 16.6373, 16.6536, 16.6511 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:21:51Z\",\n    \"avg_ns\": 47976494641,\n    \"stddev_ns\": 34139327,\n    \"avg_ts\": 2.667974,\n    \"stddev_ts\": 0.001898,\n    \"samples_ns\": [ 47955455163, 47958145661, 48015883101 ],\n    \"samples_ts\": [ 2.66914, 2.66899, 2.66578 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:19:48Z",
+          "avg_ns": 30755652883,
+          "stddev_ns": 16174325,
+          "avg_ts": 16.647349,
+          "stddev_ts": 0.008752,
+          "samples_ns": [
+            30774139683,
+            30744110030,
+            30748708936
+          ],
+          "samples_ts": [
+            16.6373,
+            16.6536,
+            16.6511
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:21:51Z",
+          "avg_ns": 47976494641,
+          "stddev_ns": 34139327,
+          "avg_ts": 2.667974,
+          "stddev_ts": 0.001898,
+          "samples_ns": [
+            47955455163,
+            47958145661,
+            48015883101
+          ],
+          "samples_ts": [
+            2.66914,
+            2.66899,
+            2.66578
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1146
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:35:59.950979+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:24:17Z\",\n    \"avg_ns\": 30819536973,\n    \"stddev_ns\": 9018194,\n    \"avg_ts\": 16.612840,\n    \"stddev_ts\": 0.004860,\n    \"samples_ns\": [ 30825600032, 30823833178, 30809177711 ],\n    \"samples_ts\": [ 16.6096, 16.6105, 16.6184 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:26:20Z\",\n    \"avg_ns\": 192924108227,\n    \"stddev_ns\": 48506150,\n    \"avg_ts\": 2.653893,\n    \"stddev_ts\": 0.000667,\n    \"samples_ns\": [ 192971443895, 192874510728, 192926370058 ],\n    \"samples_ts\": [ 2.65324, 2.65458, 2.65386 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:24:17Z",
+          "avg_ns": 30819536973,
+          "stddev_ns": 9018194,
+          "avg_ts": 16.61284,
+          "stddev_ts": 0.00486,
+          "samples_ns": [
+            30825600032,
+            30823833178,
+            30809177711
+          ],
+          "samples_ts": [
+            16.6096,
+            16.6105,
+            16.6184
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:26:20Z",
+          "avg_ns": 192924108227,
+          "stddev_ns": 48506150,
+          "avg_ts": 2.653893,
+          "stddev_ts": 0.000667,
+          "samples_ns": [
+            192971443895,
+            192874510728,
+            192926370058
+          ],
+          "samples_ts": [
+            2.65324,
+            2.65458,
+            2.65386
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1147
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:38:56.146978+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:36:01Z\",\n    \"avg_ns\": 7631657699,\n    \"stddev_ns\": 3019975,\n    \"avg_ts\": 16.772242,\n    \"stddev_ts\": 0.006638,\n    \"samples_ns\": [ 7632202234, 7628402503, 7634368360 ],\n    \"samples_ts\": [ 16.771, 16.7794, 16.7663 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:36:31Z\",\n    \"avg_ns\": 48014076847,\n    \"stddev_ns\": 11888240,\n    \"avg_ts\": 2.665885,\n    \"stddev_ts\": 0.000660,\n    \"samples_ns\": [ 48006505533, 48007946020, 48027778988 ],\n    \"samples_ts\": [ 2.66631, 2.66623, 2.66512 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:36:01Z",
+          "avg_ns": 7631657699,
+          "stddev_ns": 3019975,
+          "avg_ts": 16.772242,
+          "stddev_ts": 0.006638,
+          "samples_ns": [
+            7632202234,
+            7628402503,
+            7634368360
+          ],
+          "samples_ts": [
+            16.771,
+            16.7794,
+            16.7663
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:36:31Z",
+          "avg_ns": 48014076847,
+          "stddev_ns": 11888240,
+          "avg_ts": 2.665885,
+          "stddev_ts": 0.00066,
+          "samples_ns": [
+            48006505533,
+            48007946020,
+            48027778988
+          ],
+          "samples_ts": [
+            2.66631,
+            2.66623,
+            2.66512
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1148
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:49:07.507729+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:38:57Z\",\n    \"avg_ns\": 7636872804,\n    \"stddev_ns\": 7166388,\n    \"avg_ts\": 16.760797,\n    \"stddev_ts\": 0.015720,\n    \"samples_ns\": [ 7632558298, 7632914838, 7645145276 ],\n    \"samples_ts\": [ 16.7703, 16.7695, 16.7427 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:39:27Z\",\n    \"avg_ns\": 193053031254,\n    \"stddev_ns\": 37147177,\n    \"avg_ts\": 2.652121,\n    \"stddev_ts\": 0.000510,\n    \"samples_ns\": [ 193012048525, 193084487323, 193062557914 ],\n    \"samples_ts\": [ 2.65268, 2.65169, 2.65199 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:38:57Z",
+          "avg_ns": 7636872804,
+          "stddev_ns": 7166388,
+          "avg_ts": 16.760797,
+          "stddev_ts": 0.01572,
+          "samples_ns": [
+            7632558298,
+            7632914838,
+            7645145276
+          ],
+          "samples_ts": [
+            16.7703,
+            16.7695,
+            16.7427
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:39:27Z",
+          "avg_ns": 193053031254,
+          "stddev_ns": 37147177,
+          "avg_ts": 2.652121,
+          "stddev_ts": 0.00051,
+          "samples_ns": [
+            193012048525,
+            193084487323,
+            193062557914
+          ],
+          "samples_ts": [
+            2.65268,
+            2.65169,
+            2.65199
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1149
+    },
+    {
+      "timestamp_utc": "2025-12-11T02:53:38.764355+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:49:08Z\",\n    \"avg_ns\": 31442352646,\n    \"stddev_ns\": 9048778,\n    \"avg_ts\": 16.283770,\n    \"stddev_ts\": 0.004686,\n    \"samples_ns\": [ 31433265566, 31451362631, 31442429741 ],\n    \"samples_ts\": [ 16.2885, 16.2791, 16.2837 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:51:14Z\",\n    \"avg_ns\": 47968698482,\n    \"stddev_ns\": 54492095,\n    \"avg_ts\": 2.668409,\n    \"stddev_ts\": 0.003030,\n    \"samples_ns\": [ 48030627893, 47928097718, 47947369837 ],\n    \"samples_ts\": [ 2.66497, 2.67067, 2.66959 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:49:08Z",
+          "avg_ns": 31442352646,
+          "stddev_ns": 9048778,
+          "avg_ts": 16.28377,
+          "stddev_ts": 0.004686,
+          "samples_ns": [
+            31433265566,
+            31451362631,
+            31442429741
+          ],
+          "samples_ts": [
+            16.2885,
+            16.2791,
+            16.2837
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:51:14Z",
+          "avg_ns": 47968698482,
+          "stddev_ns": 54492095,
+          "avg_ts": 2.668409,
+          "stddev_ts": 0.00303,
+          "samples_ns": [
+            48030627893,
+            47928097718,
+            47947369837
+          ],
+          "samples_ts": [
+            2.66497,
+            2.67067,
+            2.66959
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1150
+    },
+    {
+      "timestamp_utc": "2025-12-11T03:05:25.717734+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:53:39Z\",\n    \"avg_ns\": 31306520331,\n    \"stddev_ns\": 5785420,\n    \"avg_ts\": 16.354421,\n    \"stddev_ts\": 0.003022,\n    \"samples_ns\": [ 31302921343, 31313193894, 31303445756 ],\n    \"samples_ts\": [ 16.3563, 16.3509, 16.356 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n    \"model_type\": \"gemma3 4B Q8_0\",\n    \"model_size\": 4123860992,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T02:55:45Z\",\n    \"avg_ns\": 193341454172,\n    \"stddev_ns\": 97847045,\n    \"avg_ts\": 2.648165,\n    \"stddev_ts\": 0.001340,\n    \"samples_ns\": [ 193440329910, 193339359193, 193244673415 ],\n    \"samples_ts\": [ 2.64681, 2.64819, 2.64949 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:53:39Z",
+          "avg_ns": 31306520331,
+          "stddev_ns": 5785420,
+          "avg_ts": 16.354421,
+          "stddev_ts": 0.003022,
+          "samples_ns": [
+            31302921343,
+            31313193894,
+            31303445756
+          ],
+          "samples_ts": [
+            16.3563,
+            16.3509,
+            16.356
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+          "model_type": "gemma3 4B Q8_0",
+          "model_size": 4123860992,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T02:55:45Z",
+          "avg_ns": 193341454172,
+          "stddev_ns": 97847045,
+          "avg_ts": 2.648165,
+          "stddev_ts": 0.00134,
+          "samples_ns": [
+            193440329910,
+            193339359193,
+            193244673415
+          ],
+          "samples_ts": [
+            2.64681,
+            2.64819,
+            2.64949
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q8_0",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1151
+    },
+    {
+      "timestamp_utc": "2025-12-11T03:12:26.488983+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:05:47Z\",\n    \"avg_ns\": 50262873123,\n    \"stddev_ns\": 1220963,\n    \"avg_ts\": 2.546611,\n    \"stddev_ts\": 0.000062,\n    \"samples_ns\": [ 50263912551, 50263178299, 50261528519 ],\n    \"samples_ts\": [ 2.54656, 2.5466, 2.54668 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:09:08Z\",\n    \"avg_ns\": 65849611514,\n    \"stddev_ns\": 13903673,\n    \"avg_ts\": 1.943823,\n    \"stddev_ts\": 0.000410,\n    \"samples_ns\": [ 65865657522, 65841134231, 65842042789 ],\n    \"samples_ts\": [ 1.94335, 1.94407, 1.94405 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:05:47Z",
+          "avg_ns": 50262873123,
+          "stddev_ns": 1220963,
+          "avg_ts": 2.546611,
+          "stddev_ts": 6.2e-05,
+          "samples_ns": [
+            50263912551,
+            50263178299,
+            50261528519
+          ],
+          "samples_ts": [
+            2.54656,
+            2.5466,
+            2.54668
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:09:08Z",
+          "avg_ns": 65849611514,
+          "stddev_ns": 13903673,
+          "avg_ts": 1.943823,
+          "stddev_ts": 0.00041,
+          "samples_ns": [
+            65865657522,
+            65841134231,
+            65842042789
+          ],
+          "samples_ts": [
+            1.94335,
+            1.94407,
+            1.94405
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1152
+    },
+    {
+      "timestamp_utc": "2025-12-11T03:29:08.283334+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:12:27Z\",\n    \"avg_ns\": 50240595039,\n    \"stddev_ns\": 518669,\n    \"avg_ts\": 2.547741,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 50240416958, 50241125660, 50240242500 ],\n    \"samples_ts\": [ 2.54775, 2.54771, 2.54776 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:15:48Z\",\n    \"avg_ns\": 266350634154,\n    \"stddev_ns\": 3410962,\n    \"avg_ts\": 1.922278,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 266354395651, 266348053174, 266349453639 ],\n    \"samples_ts\": [ 1.92225, 1.9223, 1.92229 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:12:27Z",
+          "avg_ns": 50240595039,
+          "stddev_ns": 518669,
+          "avg_ts": 2.547741,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            50240416958,
+            50241125660,
+            50240242500
+          ],
+          "samples_ts": [
+            2.54775,
+            2.54771,
+            2.54776
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:15:48Z",
+          "avg_ns": 266350634154,
+          "stddev_ns": 3410962,
+          "avg_ts": 1.922278,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            266354395651,
+            266348053174,
+            266349453639
+          ],
+          "samples_ts": [
+            1.92225,
+            1.9223,
+            1.92229
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1153
+    },
+    {
+      "timestamp_utc": "2025-12-11T03:45:53.789061+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:29:09Z\",\n    \"avg_ns\": 201481637539,\n    \"stddev_ns\": 453928,\n    \"avg_ts\": 2.541175,\n    \"stddev_ts\": 0.000001,\n    \"samples_ns\": [ 201481612552, 201481714073, 201481585993 ],\n    \"samples_ts\": [ 2.54117, 2.54117, 2.54118 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:42:35Z\",\n    \"avg_ns\": 65962037292,\n    \"stddev_ns\": 8225890,\n    \"avg_ts\": 1.940510,\n    \"stddev_ts\": 0.000242,\n    \"samples_ns\": [ 65970061240, 65953630833, 65962419804 ],\n    \"samples_ts\": [ 1.94027, 1.94076, 1.9405 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:29:09Z",
+          "avg_ns": 201481637539,
+          "stddev_ns": 453928,
+          "avg_ts": 2.541175,
+          "stddev_ts": 1e-06,
+          "samples_ns": [
+            201481612552,
+            201481714073,
+            201481585993
+          ],
+          "samples_ts": [
+            2.54117,
+            2.54117,
+            2.54118
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:42:35Z",
+          "avg_ns": 65962037292,
+          "stddev_ns": 8225890,
+          "avg_ts": 1.94051,
+          "stddev_ts": 0.000242,
+          "samples_ns": [
+            65970061240,
+            65953630833,
+            65962419804
+          ],
+          "samples_ts": [
+            1.94027,
+            1.94076,
+            1.9405
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1154
+    },
+    {
+      "timestamp_utc": "2025-12-11T04:12:38.115698+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:45:54Z\",\n    \"avg_ns\": 201500254016,\n    \"stddev_ns\": 1405445,\n    \"avg_ts\": 2.540940,\n    \"stddev_ts\": 0.000017,\n    \"samples_ns\": [ 201500967397, 201498717456, 201501077196 ],\n    \"samples_ts\": [ 2.54093, 2.54096, 2.54093 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T03:59:20Z\",\n    \"avg_ns\": 265539531337,\n    \"stddev_ns\": 884980,\n    \"avg_ts\": 1.928150,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 265539595780, 265539998112, 265539000121 ],\n    \"samples_ts\": [ 1.92815, 1.92815, 1.92815 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:45:54Z",
+          "avg_ns": 201500254016,
+          "stddev_ns": 1405445,
+          "avg_ts": 2.54094,
+          "stddev_ts": 1.7e-05,
+          "samples_ns": [
+            201500967397,
+            201498717456,
+            201501077196
+          ],
+          "samples_ts": [
+            2.54093,
+            2.54096,
+            2.54093
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T03:59:20Z",
+          "avg_ns": 265539531337,
+          "stddev_ns": 884980,
+          "avg_ts": 1.92815,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            265539595780,
+            265539998112,
+            265539000121
+          ],
+          "samples_ts": [
+            1.92815,
+            1.92815,
+            1.92815
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1155
+    },
+    {
+      "timestamp_utc": "2025-12-11T04:19:19.058150+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:12:39Z\",\n    \"avg_ns\": 50247178934,\n    \"stddev_ns\": 431180,\n    \"avg_ts\": 2.547407,\n    \"stddev_ts\": 0.000015,\n    \"samples_ns\": [ 50247257661, 50247423779, 50246855364 ],\n    \"samples_ts\": [ 2.5474, 2.54739, 2.54742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:16:00Z\",\n    \"avg_ns\": 65956019854,\n    \"stddev_ns\": 3685177,\n    \"avg_ts\": 1.940687,\n    \"stddev_ts\": 0.000108,\n    \"samples_ns\": [ 65960263571, 65953626550, 65954169441 ],\n    \"samples_ts\": [ 1.94056, 1.94076, 1.94074 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:12:39Z",
+          "avg_ns": 50247178934,
+          "stddev_ns": 431180,
+          "avg_ts": 2.547407,
+          "stddev_ts": 1.5e-05,
+          "samples_ns": [
+            50247257661,
+            50247423779,
+            50246855364
+          ],
+          "samples_ts": [
+            2.5474,
+            2.54739,
+            2.54742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:16:00Z",
+          "avg_ns": 65956019854,
+          "stddev_ns": 3685177,
+          "avg_ts": 1.940687,
+          "stddev_ts": 0.000108,
+          "samples_ns": [
+            65960263571,
+            65953626550,
+            65954169441
+          ],
+          "samples_ts": [
+            1.94056,
+            1.94076,
+            1.94074
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1156
+    },
+    {
+      "timestamp_utc": "2025-12-11T04:36:01.081694+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:19:20Z\",\n    \"avg_ns\": 50251180188,\n    \"stddev_ns\": 128319,\n    \"avg_ts\": 2.547204,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 50251273229, 50251233535, 50251033800 ],\n    \"samples_ts\": [ 2.5472, 2.5472, 2.54721 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:22:41Z\",\n    \"avg_ns\": 266388024599,\n    \"stddev_ns\": 4962900,\n    \"avg_ts\": 1.922008,\n    \"stddev_ts\": 0.000036,\n    \"samples_ns\": [ 266391886585, 266382426976, 266389760236 ],\n    \"samples_ts\": [ 1.92198, 1.92205, 1.922 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:19:20Z",
+          "avg_ns": 50251180188,
+          "stddev_ns": 128319,
+          "avg_ts": 2.547204,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            50251273229,
+            50251233535,
+            50251033800
+          ],
+          "samples_ts": [
+            2.5472,
+            2.5472,
+            2.54721
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:22:41Z",
+          "avg_ns": 266388024599,
+          "stddev_ns": 4962900,
+          "avg_ts": 1.922008,
+          "stddev_ts": 3.6e-05,
+          "samples_ns": [
+            266391886585,
+            266382426976,
+            266389760236
+          ],
+          "samples_ts": [
+            1.92198,
+            1.92205,
+            1.922
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1157
+    },
+    {
+      "timestamp_utc": "2025-12-11T04:52:47.280657+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:36:02Z\",\n    \"avg_ns\": 201634377368,\n    \"stddev_ns\": 908630,\n    \"avg_ts\": 2.539250,\n    \"stddev_ts\": 0.000010,\n    \"samples_ns\": [ 201634438449, 201635134976, 201633558680 ],\n    \"samples_ts\": [ 2.53925, 2.53924, 2.53926 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:49:28Z\",\n    \"avg_ns\": 65983282468,\n    \"stddev_ns\": 4798426,\n    \"avg_ts\": 1.939885,\n    \"stddev_ts\": 0.000141,\n    \"samples_ns\": [ 65988587190, 65981967224, 65979292992 ],\n    \"samples_ts\": [ 1.93973, 1.93992, 1.94 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:36:02Z",
+          "avg_ns": 201634377368,
+          "stddev_ns": 908630,
+          "avg_ts": 2.53925,
+          "stddev_ts": 1e-05,
+          "samples_ns": [
+            201634438449,
+            201635134976,
+            201633558680
+          ],
+          "samples_ts": [
+            2.53925,
+            2.53924,
+            2.53926
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:49:28Z",
+          "avg_ns": 65983282468,
+          "stddev_ns": 4798426,
+          "avg_ts": 1.939885,
+          "stddev_ts": 0.000141,
+          "samples_ns": [
+            65988587190,
+            65981967224,
+            65979292992
+          ],
+          "samples_ts": [
+            1.93973,
+            1.93992,
+            1.94
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1158
+    },
+    {
+      "timestamp_utc": "2025-12-11T05:19:33.619223+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T04:52:48Z\",\n    \"avg_ns\": 201581373904,\n    \"stddev_ns\": 689591,\n    \"avg_ts\": 2.539917,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 201582155500, 201580851322, 201581114890 ],\n    \"samples_ts\": [ 2.53991, 2.53992, 2.53992 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:06:14Z\",\n    \"avg_ns\": 266091113868,\n    \"stddev_ns\": 6334409,\n    \"avg_ts\": 1.924153,\n    \"stddev_ts\": 0.000045,\n    \"samples_ns\": [ 266087809796, 266098369911, 266087161899 ],\n    \"samples_ts\": [ 1.92418, 1.9241, 1.92418 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T04:52:48Z",
+          "avg_ns": 201581373904,
+          "stddev_ns": 689591,
+          "avg_ts": 2.539917,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            201582155500,
+            201580851322,
+            201581114890
+          ],
+          "samples_ts": [
+            2.53991,
+            2.53992,
+            2.53992
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:06:14Z",
+          "avg_ns": 266091113868,
+          "stddev_ns": 6334409,
+          "avg_ts": 1.924153,
+          "stddev_ts": 4.5e-05,
+          "samples_ns": [
+            266087809796,
+            266098369911,
+            266087161899
+          ],
+          "samples_ts": [
+            1.92418,
+            1.9241,
+            1.92418
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1159
+    },
+    {
+      "timestamp_utc": "2025-12-11T05:26:13.892815+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:19:34Z\",\n    \"avg_ns\": 50247724046,\n    \"stddev_ns\": 522448,\n    \"avg_ts\": 2.547379,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 50248080326, 50247823857, 50247267957 ],\n    \"samples_ts\": [ 2.54736, 2.54737, 2.5474 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:22:55Z\",\n    \"avg_ns\": 65851949878,\n    \"stddev_ns\": 3766169,\n    \"avg_ts\": 1.943754,\n    \"stddev_ts\": 0.000111,\n    \"samples_ns\": [ 65856274788, 65850086667, 65849488180 ],\n    \"samples_ts\": [ 1.94363, 1.94381, 1.94383 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:19:34Z",
+          "avg_ns": 50247724046,
+          "stddev_ns": 522448,
+          "avg_ts": 2.547379,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            50248080326,
+            50247823857,
+            50247267957
+          ],
+          "samples_ts": [
+            2.54736,
+            2.54737,
+            2.5474
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:22:55Z",
+          "avg_ns": 65851949878,
+          "stddev_ns": 3766169,
+          "avg_ts": 1.943754,
+          "stddev_ts": 0.000111,
+          "samples_ns": [
+            65856274788,
+            65850086667,
+            65849488180
+          ],
+          "samples_ts": [
+            1.94363,
+            1.94381,
+            1.94383
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1160
+    },
+    {
+      "timestamp_utc": "2025-12-11T05:42:53.901315+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:26:14Z\",\n    \"avg_ns\": 50246384184,\n    \"stddev_ns\": 656442,\n    \"avg_ts\": 2.547447,\n    \"stddev_ts\": 0.000029,\n    \"samples_ns\": [ 50246069250, 50246035652, 50247047652 ],\n    \"samples_ts\": [ 2.54746, 2.54746, 2.54741 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:29:35Z\",\n    \"avg_ns\": 265767318009,\n    \"stddev_ns\": 2557920,\n    \"avg_ts\": 1.926497,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 265767782796, 265769504170, 265764667063 ],\n    \"samples_ts\": [ 1.92649, 1.92648, 1.92652 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:26:14Z",
+          "avg_ns": 50246384184,
+          "stddev_ns": 656442,
+          "avg_ts": 2.547447,
+          "stddev_ts": 2.9e-05,
+          "samples_ns": [
+            50246069250,
+            50246035652,
+            50247047652
+          ],
+          "samples_ts": [
+            2.54746,
+            2.54746,
+            2.54741
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:29:35Z",
+          "avg_ns": 265767318009,
+          "stddev_ns": 2557920,
+          "avg_ts": 1.926497,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            265767782796,
+            265769504170,
+            265764667063
+          ],
+          "samples_ts": [
+            1.92649,
+            1.92648,
+            1.92652
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1161
+    },
+    {
+      "timestamp_utc": "2025-12-11T05:59:43.792056+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:42:54Z\",\n    \"avg_ns\": 202574444264,\n    \"stddev_ns\": 2867625,\n    \"avg_ts\": 2.527466,\n    \"stddev_ts\": 0.000035,\n    \"samples_ns\": [ 202574105079, 202571797047, 202577430667 ],\n    \"samples_ts\": [ 2.52747, 2.5275, 2.52743 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:56:25Z\",\n    \"avg_ns\": 65954134881,\n    \"stddev_ns\": 1638403,\n    \"avg_ts\": 1.940743,\n    \"stddev_ts\": 0.000047,\n    \"samples_ns\": [ 65955858062, 65953843770, 65952702813 ],\n    \"samples_ts\": [ 1.94069, 1.94075, 1.94078 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:42:54Z",
+          "avg_ns": 202574444264,
+          "stddev_ns": 2867625,
+          "avg_ts": 2.527466,
+          "stddev_ts": 3.5e-05,
+          "samples_ns": [
+            202574105079,
+            202571797047,
+            202577430667
+          ],
+          "samples_ts": [
+            2.52747,
+            2.5275,
+            2.52743
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:56:25Z",
+          "avg_ns": 65954134881,
+          "stddev_ns": 1638403,
+          "avg_ts": 1.940743,
+          "stddev_ts": 4.7e-05,
+          "samples_ns": [
+            65955858062,
+            65953843770,
+            65952702813
+          ],
+          "samples_ts": [
+            1.94069,
+            1.94075,
+            1.94078
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1162
+    },
+    {
+      "timestamp_utc": "2025-12-11T06:26:33.685703+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T05:59:44Z\",\n    \"avg_ns\": 202607043434,\n    \"stddev_ns\": 1648051,\n    \"avg_ts\": 2.527059,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 202608873793, 202606579268, 202605677241 ],\n    \"samples_ts\": [ 2.52704, 2.52707, 2.52708 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:13:15Z\",\n    \"avg_ns\": 265912219029,\n    \"stddev_ns\": 5669278,\n    \"avg_ts\": 1.925447,\n    \"stddev_ts\": 0.000041,\n    \"samples_ns\": [ 265918764342, 265908846975, 265909045770 ],\n    \"samples_ts\": [ 1.9254, 1.92547, 1.92547 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T05:59:44Z",
+          "avg_ns": 202607043434,
+          "stddev_ns": 1648051,
+          "avg_ts": 2.527059,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            202608873793,
+            202606579268,
+            202605677241
+          ],
+          "samples_ts": [
+            2.52704,
+            2.52707,
+            2.52708
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:13:15Z",
+          "avg_ns": 265912219029,
+          "stddev_ns": 5669278,
+          "avg_ts": 1.925447,
+          "stddev_ts": 4.1e-05,
+          "samples_ns": [
+            265918764342,
+            265908846975,
+            265909045770
+          ],
+          "samples_ts": [
+            1.9254,
+            1.92547,
+            1.92547
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1163
+    },
+    {
+      "timestamp_utc": "2025-12-11T06:33:14.151419+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:26:34Z\",\n    \"avg_ns\": 50241566402,\n    \"stddev_ns\": 210171,\n    \"avg_ts\": 2.547691,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 50241773682, 50241572070, 50241353454 ],\n    \"samples_ts\": [ 2.54768, 2.54769, 2.5477 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:29:55Z\",\n    \"avg_ns\": 65930694553,\n    \"stddev_ns\": 763348,\n    \"avg_ts\": 1.941433,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 65931370647, 65930773572, 65929939441 ],\n    \"samples_ts\": [ 1.94141, 1.94143, 1.94145 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:26:34Z",
+          "avg_ns": 50241566402,
+          "stddev_ns": 210171,
+          "avg_ts": 2.547691,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            50241773682,
+            50241572070,
+            50241353454
+          ],
+          "samples_ts": [
+            2.54768,
+            2.54769,
+            2.5477
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:29:55Z",
+          "avg_ns": 65930694553,
+          "stddev_ns": 763348,
+          "avg_ts": 1.941433,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            65931370647,
+            65930773572,
+            65929939441
+          ],
+          "samples_ts": [
+            1.94141,
+            1.94143,
+            1.94145
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1164
+    },
+    {
+      "timestamp_utc": "2025-12-11T06:49:54.376308+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:33:15Z\",\n    \"avg_ns\": 50248990210,\n    \"stddev_ns\": 240012,\n    \"avg_ts\": 2.547315,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 50248959554, 50248923977, 50249087100 ],\n    \"samples_ts\": [ 2.54732, 2.54732, 2.54731 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:36:36Z\",\n    \"avg_ns\": 265825232554,\n    \"stddev_ns\": 3153885,\n    \"avg_ts\": 1.926078,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 265821631904, 265826560137, 265827505621 ],\n    \"samples_ts\": [ 1.9261, 1.92607, 1.92606 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:33:15Z",
+          "avg_ns": 50248990210,
+          "stddev_ns": 240012,
+          "avg_ts": 2.547315,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            50248959554,
+            50248923977,
+            50249087100
+          ],
+          "samples_ts": [
+            2.54732,
+            2.54732,
+            2.54731
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:36:36Z",
+          "avg_ns": 265825232554,
+          "stddev_ns": 3153885,
+          "avg_ts": 1.926078,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            265821631904,
+            265826560137,
+            265827505621
+          ],
+          "samples_ts": [
+            1.9261,
+            1.92607,
+            1.92606
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1165
+    },
+    {
+      "timestamp_utc": "2025-12-11T07:06:39.634231+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T06:49:55Z\",\n    \"avg_ns\": 201487603580,\n    \"stddev_ns\": 1842272,\n    \"avg_ts\": 2.541099,\n    \"stddev_ts\": 0.000022,\n    \"samples_ns\": [ 201487668847, 201485842423, 201489299472 ],\n    \"samples_ts\": [ 2.5411, 2.54112, 2.54108 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:03:21Z\",\n    \"avg_ns\": 65858469206,\n    \"stddev_ns\": 3773718,\n    \"avg_ts\": 1.943562,\n    \"stddev_ts\": 0.000111,\n    \"samples_ns\": [ 65862189969, 65858539157, 65854678494 ],\n    \"samples_ts\": [ 1.94345, 1.94356, 1.94367 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T06:49:55Z",
+          "avg_ns": 201487603580,
+          "stddev_ns": 1842272,
+          "avg_ts": 2.541099,
+          "stddev_ts": 2.2e-05,
+          "samples_ns": [
+            201487668847,
+            201485842423,
+            201489299472
+          ],
+          "samples_ts": [
+            2.5411,
+            2.54112,
+            2.54108
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:03:21Z",
+          "avg_ns": 65858469206,
+          "stddev_ns": 3773718,
+          "avg_ts": 1.943562,
+          "stddev_ts": 0.000111,
+          "samples_ns": [
+            65862189969,
+            65858539157,
+            65854678494
+          ],
+          "samples_ts": [
+            1.94345,
+            1.94356,
+            1.94367
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1166
+    },
+    {
+      "timestamp_utc": "2025-12-11T07:33:25.739663+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:06:40Z\",\n    \"avg_ns\": 201537651881,\n    \"stddev_ns\": 1720622,\n    \"avg_ts\": 2.540468,\n    \"stddev_ts\": 0.000020,\n    \"samples_ns\": [ 201536896668, 201536570137, 201539488840 ],\n    \"samples_ts\": [ 2.54048, 2.54048, 2.54045 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:20:06Z\",\n    \"avg_ns\": 266076915455,\n    \"stddev_ns\": 1264792,\n    \"avg_ts\": 1.924256,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 266076323352, 266076314509, 266078108506 ],\n    \"samples_ts\": [ 1.92426, 1.92426, 1.92425 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:06:40Z",
+          "avg_ns": 201537651881,
+          "stddev_ns": 1720622,
+          "avg_ts": 2.540468,
+          "stddev_ts": 2e-05,
+          "samples_ns": [
+            201536896668,
+            201536570137,
+            201539488840
+          ],
+          "samples_ts": [
+            2.54048,
+            2.54048,
+            2.54045
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:20:06Z",
+          "avg_ns": 266076915455,
+          "stddev_ns": 1264792,
+          "avg_ts": 1.924256,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            266076323352,
+            266076314509,
+            266078108506
+          ],
+          "samples_ts": [
+            1.92426,
+            1.92426,
+            1.92425
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1167
+    },
+    {
+      "timestamp_utc": "2025-12-11T07:40:06.216790+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:33:26Z\",\n    \"avg_ns\": 50250056431,\n    \"stddev_ns\": 983014,\n    \"avg_ts\": 2.547261,\n    \"stddev_ts\": 0.000047,\n    \"samples_ns\": [ 50248992983, 50250455393, 50250720919 ],\n    \"samples_ts\": [ 2.54731, 2.54724, 2.54723 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:36:47Z\",\n    \"avg_ns\": 65926078049,\n    \"stddev_ns\": 2984169,\n    \"avg_ts\": 1.941569,\n    \"stddev_ts\": 0.000088,\n    \"samples_ns\": [ 65925615537, 65929255303, 65923363308 ],\n    \"samples_ts\": [ 1.94158, 1.94147, 1.94165 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:33:26Z",
+          "avg_ns": 50250056431,
+          "stddev_ns": 983014,
+          "avg_ts": 2.547261,
+          "stddev_ts": 4.7e-05,
+          "samples_ns": [
+            50248992983,
+            50250455393,
+            50250720919
+          ],
+          "samples_ts": [
+            2.54731,
+            2.54724,
+            2.54723
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:36:47Z",
+          "avg_ns": 65926078049,
+          "stddev_ns": 2984169,
+          "avg_ts": 1.941569,
+          "stddev_ts": 8.8e-05,
+          "samples_ns": [
+            65925615537,
+            65929255303,
+            65923363308
+          ],
+          "samples_ts": [
+            1.94158,
+            1.94147,
+            1.94165
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1168
+    },
+    {
+      "timestamp_utc": "2025-12-11T07:56:47.059503+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:40:07Z\",\n    \"avg_ns\": 50241992884,\n    \"stddev_ns\": 439646,\n    \"avg_ts\": 2.547670,\n    \"stddev_ts\": 0.000015,\n    \"samples_ns\": [ 50241889699, 50242335716, 50241753239 ],\n    \"samples_ts\": [ 2.54767, 2.54765, 2.54768 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:43:28Z\",\n    \"avg_ns\": 265946272927,\n    \"stddev_ns\": 2796153,\n    \"avg_ts\": 1.925201,\n    \"stddev_ts\": 0.000020,\n    \"samples_ns\": [ 265943421191, 265946609172, 265948788420 ],\n    \"samples_ts\": [ 1.92522, 1.9252, 1.92518 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:40:07Z",
+          "avg_ns": 50241992884,
+          "stddev_ns": 439646,
+          "avg_ts": 2.54767,
+          "stddev_ts": 1.5e-05,
+          "samples_ns": [
+            50241889699,
+            50242335716,
+            50241753239
+          ],
+          "samples_ts": [
+            2.54767,
+            2.54765,
+            2.54768
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:43:28Z",
+          "avg_ns": 265946272927,
+          "stddev_ns": 2796153,
+          "avg_ts": 1.925201,
+          "stddev_ts": 2e-05,
+          "samples_ns": [
+            265943421191,
+            265946609172,
+            265948788420
+          ],
+          "samples_ts": [
+            1.92522,
+            1.9252,
+            1.92518
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1169
+    },
+    {
+      "timestamp_utc": "2025-12-11T08:13:32.677796+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T07:56:48Z\",\n    \"avg_ns\": 201623291119,\n    \"stddev_ns\": 2701514,\n    \"avg_ts\": 2.539389,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 201624849426, 201624764453, 201620259480 ],\n    \"samples_ts\": [ 2.53937, 2.53937, 2.53943 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:10:14Z\",\n    \"avg_ns\": 65804056958,\n    \"stddev_ns\": 23604465,\n    \"avg_ts\": 1.945169,\n    \"stddev_ts\": 0.000698,\n    \"samples_ns\": [ 65831306229, 65790815932, 65790048715 ],\n    \"samples_ts\": [ 1.94436, 1.94556, 1.94558 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T07:56:48Z",
+          "avg_ns": 201623291119,
+          "stddev_ns": 2701514,
+          "avg_ts": 2.539389,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            201624849426,
+            201624764453,
+            201620259480
+          ],
+          "samples_ts": [
+            2.53937,
+            2.53937,
+            2.53943
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:10:14Z",
+          "avg_ns": 65804056958,
+          "stddev_ns": 23604465,
+          "avg_ts": 1.945169,
+          "stddev_ts": 0.000698,
+          "samples_ns": [
+            65831306229,
+            65790815932,
+            65790048715
+          ],
+          "samples_ts": [
+            1.94436,
+            1.94556,
+            1.94558
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1170
+    },
+    {
+      "timestamp_utc": "2025-12-11T08:40:18.478147+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:13:33Z\",\n    \"avg_ns\": 201578158823,\n    \"stddev_ns\": 669305,\n    \"avg_ts\": 2.539958,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 201578209804, 201578340370, 201577926297 ],\n    \"samples_ts\": [ 2.53996, 2.53996, 2.53996 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:27:00Z\",\n    \"avg_ns\": 265914795795,\n    \"stddev_ns\": 1840999,\n    \"avg_ts\": 1.925429,\n    \"stddev_ts\": 0.000013,\n    \"samples_ns\": [ 265914453024, 265913224986, 265916709376 ],\n    \"samples_ts\": [ 1.92543, 1.92544, 1.92541 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:13:33Z",
+          "avg_ns": 201578158823,
+          "stddev_ns": 669305,
+          "avg_ts": 2.539958,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            201578209804,
+            201578340370,
+            201577926297
+          ],
+          "samples_ts": [
+            2.53996,
+            2.53996,
+            2.53996
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:27:00Z",
+          "avg_ns": 265914795795,
+          "stddev_ns": 1840999,
+          "avg_ts": 1.925429,
+          "stddev_ts": 1.3e-05,
+          "samples_ns": [
+            265914453024,
+            265913224986,
+            265916709376
+          ],
+          "samples_ts": [
+            1.92543,
+            1.92544,
+            1.92541
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1171
+    },
+    {
+      "timestamp_utc": "2025-12-11T08:46:59.009876+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:40:19Z\",\n    \"avg_ns\": 50252765970,\n    \"stddev_ns\": 1089200,\n    \"avg_ts\": 2.547123,\n    \"stddev_ts\": 0.000055,\n    \"samples_ns\": [ 50251539724, 50253137022, 50253621164 ],\n    \"samples_ts\": [ 2.54719, 2.5471, 2.54708 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:43:40Z\",\n    \"avg_ns\": 65940722462,\n    \"stddev_ns\": 1912051,\n    \"avg_ts\": 1.941137,\n    \"stddev_ts\": 0.000056,\n    \"samples_ns\": [ 65942565557, 65938748207, 65940853622 ],\n    \"samples_ts\": [ 1.94108, 1.9412, 1.94113 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:40:19Z",
+          "avg_ns": 50252765970,
+          "stddev_ns": 1089200,
+          "avg_ts": 2.547123,
+          "stddev_ts": 5.5e-05,
+          "samples_ns": [
+            50251539724,
+            50253137022,
+            50253621164
+          ],
+          "samples_ts": [
+            2.54719,
+            2.5471,
+            2.54708
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:43:40Z",
+          "avg_ns": 65940722462,
+          "stddev_ns": 1912051,
+          "avg_ts": 1.941137,
+          "stddev_ts": 5.6e-05,
+          "samples_ns": [
+            65942565557,
+            65938748207,
+            65940853622
+          ],
+          "samples_ts": [
+            1.94108,
+            1.9412,
+            1.94113
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1172
+    },
+    {
+      "timestamp_utc": "2025-12-11T09:03:39.654348+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:47:00Z\",\n    \"avg_ns\": 50256585379,\n    \"stddev_ns\": 538893,\n    \"avg_ts\": 2.546930,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 50256433527, 50256189230, 50257133381 ],\n    \"samples_ts\": [ 2.54694, 2.54695, 2.5469 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T08:50:21Z\",\n    \"avg_ns\": 265968179655,\n    \"stddev_ns\": 4304066,\n    \"avg_ts\": 1.925042,\n    \"stddev_ts\": 0.000031,\n    \"samples_ns\": [ 265970803440, 265970523142, 265963212383 ],\n    \"samples_ts\": [ 1.92502, 1.92503, 1.92508 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:47:00Z",
+          "avg_ns": 50256585379,
+          "stddev_ns": 538893,
+          "avg_ts": 2.54693,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            50256433527,
+            50256189230,
+            50257133381
+          ],
+          "samples_ts": [
+            2.54694,
+            2.54695,
+            2.5469
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T08:50:21Z",
+          "avg_ns": 265968179655,
+          "stddev_ns": 4304066,
+          "avg_ts": 1.925042,
+          "stddev_ts": 3.1e-05,
+          "samples_ns": [
+            265970803440,
+            265970523142,
+            265963212383
+          ],
+          "samples_ts": [
+            1.92502,
+            1.92503,
+            1.92508
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1173
+    },
+    {
+      "timestamp_utc": "2025-12-11T09:20:29.658199+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:03:40Z\",\n    \"avg_ns\": 202606680933,\n    \"stddev_ns\": 1898490,\n    \"avg_ts\": 2.527064,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 202608485428, 202606758232, 202604799140 ],\n    \"samples_ts\": [ 2.52704, 2.52706, 2.52709 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:17:11Z\",\n    \"avg_ns\": 65944345739,\n    \"stddev_ns\": 7967570,\n    \"avg_ts\": 1.941031,\n    \"stddev_ts\": 0.000234,\n    \"samples_ns\": [ 65953531896, 65940000024, 65939505299 ],\n    \"samples_ts\": [ 1.94076, 1.94116, 1.94117 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:03:40Z",
+          "avg_ns": 202606680933,
+          "stddev_ns": 1898490,
+          "avg_ts": 2.527064,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            202608485428,
+            202606758232,
+            202604799140
+          ],
+          "samples_ts": [
+            2.52704,
+            2.52706,
+            2.52709
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:17:11Z",
+          "avg_ns": 65944345739,
+          "stddev_ns": 7967570,
+          "avg_ts": 1.941031,
+          "stddev_ts": 0.000234,
+          "samples_ns": [
+            65953531896,
+            65940000024,
+            65939505299
+          ],
+          "samples_ts": [
+            1.94076,
+            1.94116,
+            1.94117
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1174
+    },
+    {
+      "timestamp_utc": "2025-12-11T09:47:18.564455+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:20:30Z\",\n    \"avg_ns\": 202542807547,\n    \"stddev_ns\": 2608543,\n    \"avg_ts\": 2.527861,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 202543868378, 202539835723, 202544718540 ],\n    \"samples_ts\": [ 2.52785, 2.5279, 2.52784 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:34:01Z\",\n    \"avg_ns\": 265547333729,\n    \"stddev_ns\": 1430433,\n    \"avg_ts\": 1.928093,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 265548627292, 265547196916, 265546176981 ],\n    \"samples_ts\": [ 1.92808, 1.92809, 1.9281 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:20:30Z",
+          "avg_ns": 202542807547,
+          "stddev_ns": 2608543,
+          "avg_ts": 2.527861,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            202543868378,
+            202539835723,
+            202544718540
+          ],
+          "samples_ts": [
+            2.52785,
+            2.5279,
+            2.52784
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:34:01Z",
+          "avg_ns": 265547333729,
+          "stddev_ns": 1430433,
+          "avg_ts": 1.928093,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            265548627292,
+            265547196916,
+            265546176981
+          ],
+          "samples_ts": [
+            1.92808,
+            1.92809,
+            1.9281
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1175
+    },
+    {
+      "timestamp_utc": "2025-12-11T09:53:58.962147+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:47:19Z\",\n    \"avg_ns\": 50242758154,\n    \"stddev_ns\": 598605,\n    \"avg_ts\": 2.547631,\n    \"stddev_ts\": 0.000030,\n    \"samples_ns\": [ 50242398316, 50243449166, 50242426980 ],\n    \"samples_ts\": [ 2.54765, 2.5476, 2.54765 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:50:40Z\",\n    \"avg_ns\": 65892551227,\n    \"stddev_ns\": 3248001,\n    \"avg_ts\": 1.942556,\n    \"stddev_ts\": 0.000095,\n    \"samples_ns\": [ 65896139725, 65891628612, 65889885346 ],\n    \"samples_ts\": [ 1.94245, 1.94258, 1.94264 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:47:19Z",
+          "avg_ns": 50242758154,
+          "stddev_ns": 598605,
+          "avg_ts": 2.547631,
+          "stddev_ts": 3e-05,
+          "samples_ns": [
+            50242398316,
+            50243449166,
+            50242426980
+          ],
+          "samples_ts": [
+            2.54765,
+            2.5476,
+            2.54765
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:50:40Z",
+          "avg_ns": 65892551227,
+          "stddev_ns": 3248001,
+          "avg_ts": 1.942556,
+          "stddev_ts": 9.5e-05,
+          "samples_ns": [
+            65896139725,
+            65891628612,
+            65889885346
+          ],
+          "samples_ts": [
+            1.94245,
+            1.94258,
+            1.94264
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1176
+    },
+    {
+      "timestamp_utc": "2025-12-11T10:10:39.294956+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:54:00Z\",\n    \"avg_ns\": 50246902613,\n    \"stddev_ns\": 389222,\n    \"avg_ts\": 2.547421,\n    \"stddev_ts\": 0.000016,\n    \"samples_ns\": [ 50247222092, 50246585721, 50246900027 ],\n    \"samples_ts\": [ 2.5474, 2.54744, 2.54742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T09:57:21Z\",\n    \"avg_ns\": 265871770831,\n    \"stddev_ns\": 3160896,\n    \"avg_ts\": 1.925740,\n    \"stddev_ts\": 0.000022,\n    \"samples_ns\": [ 265875166113, 265870975172, 265869171210 ],\n    \"samples_ts\": [ 1.92572, 1.92575, 1.92576 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:54:00Z",
+          "avg_ns": 50246902613,
+          "stddev_ns": 389222,
+          "avg_ts": 2.547421,
+          "stddev_ts": 1.6e-05,
+          "samples_ns": [
+            50247222092,
+            50246585721,
+            50246900027
+          ],
+          "samples_ts": [
+            2.5474,
+            2.54744,
+            2.54742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T09:57:21Z",
+          "avg_ns": 265871770831,
+          "stddev_ns": 3160896,
+          "avg_ts": 1.92574,
+          "stddev_ts": 2.2e-05,
+          "samples_ns": [
+            265875166113,
+            265870975172,
+            265869171210
+          ],
+          "samples_ts": [
+            1.92572,
+            1.92575,
+            1.92576
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1177
+    },
+    {
+      "timestamp_utc": "2025-12-11T10:27:24.392865+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:10:40Z\",\n    \"avg_ns\": 201506255209,\n    \"stddev_ns\": 968297,\n    \"avg_ts\": 2.540864,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 201505138193, 201506771223, 201506856211 ],\n    \"samples_ts\": [ 2.54088, 2.54086, 2.54086 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:24:06Z\",\n    \"avg_ns\": 65785331567,\n    \"stddev_ns\": 5108680,\n    \"avg_ts\": 1.945723,\n    \"stddev_ts\": 0.000151,\n    \"samples_ns\": [ 65791215353, 65782336325, 65782443025 ],\n    \"samples_ts\": [ 1.94555, 1.94581, 1.94581 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:10:40Z",
+          "avg_ns": 201506255209,
+          "stddev_ns": 968297,
+          "avg_ts": 2.540864,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            201505138193,
+            201506771223,
+            201506856211
+          ],
+          "samples_ts": [
+            2.54088,
+            2.54086,
+            2.54086
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:24:06Z",
+          "avg_ns": 65785331567,
+          "stddev_ns": 5108680,
+          "avg_ts": 1.945723,
+          "stddev_ts": 0.000151,
+          "samples_ns": [
+            65791215353,
+            65782336325,
+            65782443025
+          ],
+          "samples_ts": [
+            1.94555,
+            1.94581,
+            1.94581
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1178
+    },
+    {
+      "timestamp_utc": "2025-12-11T10:54:10.189126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:27:25Z\",\n    \"avg_ns\": 201537510232,\n    \"stddev_ns\": 1283323,\n    \"avg_ts\": 2.540470,\n    \"stddev_ts\": 0.000014,\n    \"samples_ns\": [ 201538600293, 201537559063, 201536371342 ],\n    \"samples_ts\": [ 2.54046, 2.54047, 2.54048 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:40:51Z\",\n    \"avg_ns\": 265972649749,\n    \"stddev_ns\": 4582885,\n    \"avg_ts\": 1.925010,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 265977932902, 265970270963, 265969745382 ],\n    \"samples_ts\": [ 1.92497, 1.92503, 1.92503 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:27:25Z",
+          "avg_ns": 201537510232,
+          "stddev_ns": 1283323,
+          "avg_ts": 2.54047,
+          "stddev_ts": 1.4e-05,
+          "samples_ns": [
+            201538600293,
+            201537559063,
+            201536371342
+          ],
+          "samples_ts": [
+            2.54046,
+            2.54047,
+            2.54048
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:40:51Z",
+          "avg_ns": 265972649749,
+          "stddev_ns": 4582885,
+          "avg_ts": 1.92501,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            265977932902,
+            265970270963,
+            265969745382
+          ],
+          "samples_ts": [
+            1.92497,
+            1.92503,
+            1.92503
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1179
+    },
+    {
+      "timestamp_utc": "2025-12-11T11:00:50.710300+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:54:11Z\",\n    \"avg_ns\": 50255108900,\n    \"stddev_ns\": 592891,\n    \"avg_ts\": 2.547005,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 50255050375, 50254639725, 50255636602 ],\n    \"samples_ts\": [ 2.54701, 2.54703, 2.54698 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T10:57:32Z\",\n    \"avg_ns\": 65933332412,\n    \"stddev_ns\": 615449,\n    \"avg_ts\": 1.941355,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 65933988210, 65932767394, 65933241632 ],\n    \"samples_ts\": [ 1.94134, 1.94137, 1.94136 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:54:11Z",
+          "avg_ns": 50255108900,
+          "stddev_ns": 592891,
+          "avg_ts": 2.547005,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            50255050375,
+            50254639725,
+            50255636602
+          ],
+          "samples_ts": [
+            2.54701,
+            2.54703,
+            2.54698
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T10:57:32Z",
+          "avg_ns": 65933332412,
+          "stddev_ns": 615449,
+          "avg_ts": 1.941355,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            65933988210,
+            65932767394,
+            65933241632
+          ],
+          "samples_ts": [
+            1.94134,
+            1.94137,
+            1.94136
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1180
+    },
+    {
+      "timestamp_utc": "2025-12-11T11:17:30.192325+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:00:51Z\",\n    \"avg_ns\": 50245197810,\n    \"stddev_ns\": 332289,\n    \"avg_ts\": 2.547507,\n    \"stddev_ts\": 0.000017,\n    \"samples_ns\": [ 50244874508, 50245538411, 50245180511 ],\n    \"samples_ts\": [ 2.54752, 2.54749, 2.54751 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:04:12Z\",\n    \"avg_ns\": 265594055012,\n    \"stddev_ns\": 6934398,\n    \"avg_ts\": 1.927754,\n    \"stddev_ts\": 0.000050,\n    \"samples_ns\": [ 265587779250, 265592948392, 265601437396 ],\n    \"samples_ts\": [ 1.9278, 1.92776, 1.9277 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:00:51Z",
+          "avg_ns": 50245197810,
+          "stddev_ns": 332289,
+          "avg_ts": 2.547507,
+          "stddev_ts": 1.7e-05,
+          "samples_ns": [
+            50244874508,
+            50245538411,
+            50245180511
+          ],
+          "samples_ts": [
+            2.54752,
+            2.54749,
+            2.54751
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:04:12Z",
+          "avg_ns": 265594055012,
+          "stddev_ns": 6934398,
+          "avg_ts": 1.927754,
+          "stddev_ts": 5e-05,
+          "samples_ns": [
+            265587779250,
+            265592948392,
+            265601437396
+          ],
+          "samples_ts": [
+            1.9278,
+            1.92776,
+            1.9277
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1181
+    },
+    {
+      "timestamp_utc": "2025-12-11T11:34:16.250132+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:17:31Z\",\n    \"avg_ns\": 201598526655,\n    \"stddev_ns\": 1906628,\n    \"avg_ts\": 2.539701,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 201599253286, 201599847395, 201596479286 ],\n    \"samples_ts\": [ 2.53969, 2.53968, 2.53973 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:30:57Z\",\n    \"avg_ns\": 65968963041,\n    \"stddev_ns\": 2583915,\n    \"avg_ts\": 1.940306,\n    \"stddev_ts\": 0.000076,\n    \"samples_ns\": [ 65971602759, 65968847563, 65966438801 ],\n    \"samples_ts\": [ 1.94023, 1.94031, 1.94038 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:17:31Z",
+          "avg_ns": 201598526655,
+          "stddev_ns": 1906628,
+          "avg_ts": 2.539701,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            201599253286,
+            201599847395,
+            201596479286
+          ],
+          "samples_ts": [
+            2.53969,
+            2.53968,
+            2.53973
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:30:57Z",
+          "avg_ns": 65968963041,
+          "stddev_ns": 2583915,
+          "avg_ts": 1.940306,
+          "stddev_ts": 7.6e-05,
+          "samples_ns": [
+            65971602759,
+            65968847563,
+            65966438801
+          ],
+          "samples_ts": [
+            1.94023,
+            1.94031,
+            1.94038
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1182
+    },
+    {
+      "timestamp_utc": "2025-12-11T12:01:02.386052+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:34:17Z\",\n    \"avg_ns\": 201574385813,\n    \"stddev_ns\": 2060041,\n    \"avg_ts\": 2.540005,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 201576622500, 201573565042, 201572969899 ],\n    \"samples_ts\": [ 2.53998, 2.54002, 2.54002 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T11:47:43Z\",\n    \"avg_ns\": 266024413977,\n    \"stddev_ns\": 6710998,\n    \"avg_ts\": 1.924635,\n    \"stddev_ts\": 0.000048,\n    \"samples_ns\": [ 266016977903, 266026390163, 266029873867 ],\n    \"samples_ts\": [ 1.92469, 1.92462, 1.9246 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:34:17Z",
+          "avg_ns": 201574385813,
+          "stddev_ns": 2060041,
+          "avg_ts": 2.540005,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            201576622500,
+            201573565042,
+            201572969899
+          ],
+          "samples_ts": [
+            2.53998,
+            2.54002,
+            2.54002
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T11:47:43Z",
+          "avg_ns": 266024413977,
+          "stddev_ns": 6710998,
+          "avg_ts": 1.924635,
+          "stddev_ts": 4.8e-05,
+          "samples_ns": [
+            266016977903,
+            266026390163,
+            266029873867
+          ],
+          "samples_ts": [
+            1.92469,
+            1.92462,
+            1.9246
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1183
+    },
+    {
+      "timestamp_utc": "2025-12-11T12:07:42.978758+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:01:03Z\",\n    \"avg_ns\": 50253315586,\n    \"stddev_ns\": 143590,\n    \"avg_ts\": 2.547096,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 50253459144, 50253171964, 50253315650 ],\n    \"samples_ts\": [ 2.54709, 2.5471, 2.5471 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:04:24Z\",\n    \"avg_ns\": 65943483753,\n    \"stddev_ns\": 1002828,\n    \"avg_ts\": 1.941056,\n    \"stddev_ts\": 0.000030,\n    \"samples_ns\": [ 65944641609, 65942918658, 65942890992 ],\n    \"samples_ts\": [ 1.94102, 1.94107, 1.94107 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:01:03Z",
+          "avg_ns": 50253315586,
+          "stddev_ns": 143590,
+          "avg_ts": 2.547096,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            50253459144,
+            50253171964,
+            50253315650
+          ],
+          "samples_ts": [
+            2.54709,
+            2.5471,
+            2.5471
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:04:24Z",
+          "avg_ns": 65943483753,
+          "stddev_ns": 1002828,
+          "avg_ts": 1.941056,
+          "stddev_ts": 3e-05,
+          "samples_ns": [
+            65944641609,
+            65942918658,
+            65942890992
+          ],
+          "samples_ts": [
+            1.94102,
+            1.94107,
+            1.94107
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1184
+    },
+    {
+      "timestamp_utc": "2025-12-11T12:24:24.102642+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:07:44Z\",\n    \"avg_ns\": 50398138482,\n    \"stddev_ns\": 247411620,\n    \"avg_ts\": 2.539817,\n    \"stddev_ts\": 0.012433,\n    \"samples_ns\": [ 50683823310, 50256033550, 50254558588 ],\n    \"samples_ts\": [ 2.52546, 2.54696, 2.54703 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:11:06Z\",\n    \"avg_ns\": 265594389779,\n    \"stddev_ns\": 3435133,\n    \"avg_ts\": 1.927752,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 265597121284, 265595405858, 265590642197 ],\n    \"samples_ts\": [ 1.92773, 1.92774, 1.92778 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:07:44Z",
+          "avg_ns": 50398138482,
+          "stddev_ns": 247411620,
+          "avg_ts": 2.539817,
+          "stddev_ts": 0.012433,
+          "samples_ns": [
+            50683823310,
+            50256033550,
+            50254558588
+          ],
+          "samples_ts": [
+            2.52546,
+            2.54696,
+            2.54703
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:11:06Z",
+          "avg_ns": 265594389779,
+          "stddev_ns": 3435133,
+          "avg_ts": 1.927752,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            265597121284,
+            265595405858,
+            265590642197
+          ],
+          "samples_ts": [
+            1.92773,
+            1.92774,
+            1.92778
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1185
+    },
+    {
+      "timestamp_utc": "2025-12-11T12:41:13.819607+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:24:25Z\",\n    \"avg_ns\": 202543723528,\n    \"stddev_ns\": 1883672,\n    \"avg_ts\": 2.527849,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 202544308900, 202541616668, 202545245016 ],\n    \"samples_ts\": [ 2.52784, 2.52788, 2.52783 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:37:55Z\",\n    \"avg_ns\": 65938675055,\n    \"stddev_ns\": 2192902,\n    \"avg_ts\": 1.941198,\n    \"stddev_ts\": 0.000065,\n    \"samples_ns\": [ 65940538913, 65939227493, 65936258759 ],\n    \"samples_ts\": [ 1.94114, 1.94118, 1.94127 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:24:25Z",
+          "avg_ns": 202543723528,
+          "stddev_ns": 1883672,
+          "avg_ts": 2.527849,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            202544308900,
+            202541616668,
+            202545245016
+          ],
+          "samples_ts": [
+            2.52784,
+            2.52788,
+            2.52783
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:37:55Z",
+          "avg_ns": 65938675055,
+          "stddev_ns": 2192902,
+          "avg_ts": 1.941198,
+          "stddev_ts": 6.5e-05,
+          "samples_ns": [
+            65940538913,
+            65939227493,
+            65936258759
+          ],
+          "samples_ts": [
+            1.94114,
+            1.94118,
+            1.94127
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1186
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:08:05.344814+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:41:14Z\",\n    \"avg_ns\": 202564165257,\n    \"stddev_ns\": 1326527,\n    \"avg_ts\": 2.527594,\n    \"stddev_ts\": 0.000016,\n    \"samples_ns\": [ 202565476280, 202562992033, 202564027459 ],\n    \"samples_ts\": [ 2.52758, 2.52761, 2.5276 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T12:54:45Z\",\n    \"avg_ns\": 266503174740,\n    \"stddev_ns\": 7206588,\n    \"avg_ts\": 1.921178,\n    \"stddev_ts\": 0.000052,\n    \"samples_ns\": [ 266506335000, 266508221185, 266494968037 ],\n    \"samples_ts\": [ 1.92116, 1.92114, 1.92124 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:41:14Z",
+          "avg_ns": 202564165257,
+          "stddev_ns": 1326527,
+          "avg_ts": 2.527594,
+          "stddev_ts": 1.6e-05,
+          "samples_ns": [
+            202565476280,
+            202562992033,
+            202564027459
+          ],
+          "samples_ts": [
+            2.52758,
+            2.52761,
+            2.5276
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T12:54:45Z",
+          "avg_ns": 266503174740,
+          "stddev_ns": 7206588,
+          "avg_ts": 1.921178,
+          "stddev_ts": 5.2e-05,
+          "samples_ns": [
+            266506335000,
+            266508221185,
+            266494968037
+          ],
+          "samples_ts": [
+            1.92116,
+            1.92114,
+            1.92124
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1187
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:11:31.377883+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:08:06Z\",\n    \"avg_ns\": 25177183074,\n    \"stddev_ns\": 1145310,\n    \"avg_ts\": 5.083968,\n    \"stddev_ts\": 0.000229,\n    \"samples_ns\": [ 25178421381, 25176933389, 25176194453 ],\n    \"samples_ts\": [ 5.08372, 5.08402, 5.08417 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:09:47Z\",\n    \"avg_ns\": 34620345446,\n    \"stddev_ns\": 1800559,\n    \"avg_ts\": 3.697248,\n    \"stddev_ts\": 0.000190,\n    \"samples_ns\": [ 34622274330, 34618762677, 34619999333 ],\n    \"samples_ts\": [ 3.69704, 3.69742, 3.69728 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:08:06Z",
+          "avg_ns": 25177183074,
+          "stddev_ns": 1145310,
+          "avg_ts": 5.083968,
+          "stddev_ts": 0.000229,
+          "samples_ns": [
+            25178421381,
+            25176933389,
+            25176194453
+          ],
+          "samples_ts": [
+            5.08372,
+            5.08402,
+            5.08417
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:09:47Z",
+          "avg_ns": 34620345446,
+          "stddev_ns": 1800559,
+          "avg_ts": 3.697248,
+          "stddev_ts": 0.00019,
+          "samples_ns": [
+            34622274330,
+            34618762677,
+            34619999333
+          ],
+          "samples_ts": [
+            3.69704,
+            3.69742,
+            3.69728
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1188
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:20:12.669707+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:11:32Z\",\n    \"avg_ns\": 25179584502,\n    \"stddev_ns\": 1420373,\n    \"avg_ts\": 5.083483,\n    \"stddev_ts\": 0.000283,\n    \"samples_ns\": [ 25181035027, 25179483009, 25178235472 ],\n    \"samples_ts\": [ 5.08319, 5.0835, 5.08376 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:13:13Z\",\n    \"avg_ns\": 139701298865,\n    \"stddev_ns\": 1669240,\n    \"avg_ts\": 3.664962,\n    \"stddev_ts\": 0.000044,\n    \"samples_ns\": [ 139701249102, 139702992431, 139699655062 ],\n    \"samples_ts\": [ 3.66496, 3.66492, 3.66501 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:11:32Z",
+          "avg_ns": 25179584502,
+          "stddev_ns": 1420373,
+          "avg_ts": 5.083483,
+          "stddev_ts": 0.000283,
+          "samples_ns": [
+            25181035027,
+            25179483009,
+            25178235472
+          ],
+          "samples_ts": [
+            5.08319,
+            5.0835,
+            5.08376
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:13:13Z",
+          "avg_ns": 139701298865,
+          "stddev_ns": 1669240,
+          "avg_ts": 3.664962,
+          "stddev_ts": 4.4e-05,
+          "samples_ns": [
+            139701249102,
+            139702992431,
+            139699655062
+          ],
+          "samples_ts": [
+            3.66496,
+            3.66492,
+            3.66501
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1189
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:28:41.693173+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:20:13Z\",\n    \"avg_ns\": 100964963123,\n    \"stddev_ns\": 1181267,\n    \"avg_ts\": 5.071066,\n    \"stddev_ts\": 0.000055,\n    \"samples_ns\": [ 100966215428, 100964468364, 100964205579 ],\n    \"samples_ts\": [ 5.071, 5.07109, 5.0711 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:26:57Z\",\n    \"avg_ns\": 34557842725,\n    \"stddev_ns\": 9148985,\n    \"avg_ts\": 3.703935,\n    \"stddev_ts\": 0.000980,\n    \"samples_ns\": [ 34567047094, 34557723232, 34548757851 ],\n    \"samples_ts\": [ 3.70295, 3.70395, 3.70491 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:20:13Z",
+          "avg_ns": 100964963123,
+          "stddev_ns": 1181267,
+          "avg_ts": 5.071066,
+          "stddev_ts": 5.5e-05,
+          "samples_ns": [
+            100966215428,
+            100964468364,
+            100964205579
+          ],
+          "samples_ts": [
+            5.071,
+            5.07109,
+            5.0711
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:26:57Z",
+          "avg_ns": 34557842725,
+          "stddev_ns": 9148985,
+          "avg_ts": 3.703935,
+          "stddev_ts": 0.00098,
+          "samples_ns": [
+            34567047094,
+            34557723232,
+            34548757851
+          ],
+          "samples_ts": [
+            3.70295,
+            3.70395,
+            3.70491
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1190
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:42:25.578279+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:28:42Z\",\n    \"avg_ns\": 100976598238,\n    \"stddev_ns\": 2169073,\n    \"avg_ts\": 5.070482,\n    \"stddev_ts\": 0.000109,\n    \"samples_ns\": [ 100974983081, 100975748008, 100979063625 ],\n    \"samples_ts\": [ 5.07056, 5.07052, 5.07036 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:35:26Z\",\n    \"avg_ns\": 139490799500,\n    \"stddev_ns\": 1902020,\n    \"avg_ts\": 3.670493,\n    \"stddev_ts\": 0.000048,\n    \"samples_ns\": [ 139492886884, 139490021933, 139489489685 ],\n    \"samples_ts\": [ 3.67044, 3.67051, 3.67053 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:28:42Z",
+          "avg_ns": 100976598238,
+          "stddev_ns": 2169073,
+          "avg_ts": 5.070482,
+          "stddev_ts": 0.000109,
+          "samples_ns": [
+            100974983081,
+            100975748008,
+            100979063625
+          ],
+          "samples_ts": [
+            5.07056,
+            5.07052,
+            5.07036
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:35:26Z",
+          "avg_ns": 139490799500,
+          "stddev_ns": 1902020,
+          "avg_ts": 3.670493,
+          "stddev_ts": 4.8e-05,
+          "samples_ns": [
+            139492886884,
+            139490021933,
+            139489489685
+          ],
+          "samples_ts": [
+            3.67044,
+            3.67051,
+            3.67053
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1191
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:45:51.395970+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:42:26Z\",\n    \"avg_ns\": 25176536188,\n    \"stddev_ns\": 438182,\n    \"avg_ts\": 5.084099,\n    \"stddev_ts\": 0.000076,\n    \"samples_ns\": [ 25176449522, 25176948327, 25176210717 ],\n    \"samples_ts\": [ 5.08412, 5.08402, 5.08416 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:44:07Z\",\n    \"avg_ns\": 34549358546,\n    \"stddev_ns\": 1634742,\n    \"avg_ts\": 3.704844,\n    \"stddev_ts\": 0.000174,\n    \"samples_ns\": [ 34551223142, 34548252193, 34548600304 ],\n    \"samples_ts\": [ 3.70464, 3.70496, 3.70493 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:42:26Z",
+          "avg_ns": 25176536188,
+          "stddev_ns": 438182,
+          "avg_ts": 5.084099,
+          "stddev_ts": 7.6e-05,
+          "samples_ns": [
+            25176449522,
+            25176948327,
+            25176210717
+          ],
+          "samples_ts": [
+            5.08412,
+            5.08402,
+            5.08416
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:44:07Z",
+          "avg_ns": 34549358546,
+          "stddev_ns": 1634742,
+          "avg_ts": 3.704844,
+          "stddev_ts": 0.000174,
+          "samples_ns": [
+            34551223142,
+            34548252193,
+            34548600304
+          ],
+          "samples_ts": [
+            3.70464,
+            3.70496,
+            3.70493
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1192
+    },
+    {
+      "timestamp_utc": "2025-12-11T13:54:32.838186+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:45:52Z\",\n    \"avg_ns\": 25184910972,\n    \"stddev_ns\": 2740077,\n    \"avg_ts\": 5.082408,\n    \"stddev_ts\": 0.000551,\n    \"samples_ns\": [ 25188016129, 25183833955, 25182882834 ],\n    \"samples_ts\": [ 5.08178, 5.08263, 5.08282 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:47:33Z\",\n    \"avg_ns\": 139742579188,\n    \"stddev_ns\": 4023667,\n    \"avg_ts\": 3.663880,\n    \"stddev_ts\": 0.000105,\n    \"samples_ns\": [ 139738010811, 139745371177, 139744355578 ],\n    \"samples_ts\": [ 3.664, 3.66381, 3.66383 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:45:52Z",
+          "avg_ns": 25184910972,
+          "stddev_ns": 2740077,
+          "avg_ts": 5.082408,
+          "stddev_ts": 0.000551,
+          "samples_ns": [
+            25188016129,
+            25183833955,
+            25182882834
+          ],
+          "samples_ts": [
+            5.08178,
+            5.08263,
+            5.08282
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:47:33Z",
+          "avg_ns": 139742579188,
+          "stddev_ns": 4023667,
+          "avg_ts": 3.66388,
+          "stddev_ts": 0.000105,
+          "samples_ns": [
+            139738010811,
+            139745371177,
+            139744355578
+          ],
+          "samples_ts": [
+            3.664,
+            3.66381,
+            3.66383
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1193
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:03:02.747982+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T13:54:33Z\",\n    \"avg_ns\": 101165111444,\n    \"stddev_ns\": 1048348,\n    \"avg_ts\": 5.061033,\n    \"stddev_ts\": 0.000052,\n    \"samples_ns\": [ 101166319730, 101164571076, 101164443526 ],\n    \"samples_ts\": [ 5.06097, 5.06106, 5.06107 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:01:18Z\",\n    \"avg_ns\": 34588573971,\n    \"stddev_ns\": 4951882,\n    \"avg_ts\": 3.700644,\n    \"stddev_ts\": 0.000530,\n    \"samples_ns\": [ 34594225064, 34586503412, 34584993437 ],\n    \"samples_ts\": [ 3.70004, 3.70087, 3.70103 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T13:54:33Z",
+          "avg_ns": 101165111444,
+          "stddev_ns": 1048348,
+          "avg_ts": 5.061033,
+          "stddev_ts": 5.2e-05,
+          "samples_ns": [
+            101166319730,
+            101164571076,
+            101164443526
+          ],
+          "samples_ts": [
+            5.06097,
+            5.06106,
+            5.06107
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:01:18Z",
+          "avg_ns": 34588573971,
+          "stddev_ns": 4951882,
+          "avg_ts": 3.700644,
+          "stddev_ts": 0.00053,
+          "samples_ns": [
+            34594225064,
+            34586503412,
+            34584993437
+          ],
+          "samples_ts": [
+            3.70004,
+            3.70087,
+            3.70103
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1194
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:16:47.948802+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:03:03Z\",\n    \"avg_ns\": 101157118780,\n    \"stddev_ns\": 768139,\n    \"avg_ts\": 5.061433,\n    \"stddev_ts\": 0.000031,\n    \"samples_ns\": [ 101157776588, 101157041257, 101156538497 ],\n    \"samples_ts\": [ 5.0614, 5.06144, 5.06146 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:09:48Z\",\n    \"avg_ns\": 139691450398,\n    \"stddev_ns\": 5629237,\n    \"avg_ts\": 3.665221,\n    \"stddev_ts\": 0.000148,\n    \"samples_ns\": [ 139697221285, 139691155507, 139685974402 ],\n    \"samples_ts\": [ 3.66507, 3.66523, 3.66536 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:03:03Z",
+          "avg_ns": 101157118780,
+          "stddev_ns": 768139,
+          "avg_ts": 5.061433,
+          "stddev_ts": 3.1e-05,
+          "samples_ns": [
+            101157776588,
+            101157041257,
+            101156538497
+          ],
+          "samples_ts": [
+            5.0614,
+            5.06144,
+            5.06146
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:09:48Z",
+          "avg_ns": 139691450398,
+          "stddev_ns": 5629237,
+          "avg_ts": 3.665221,
+          "stddev_ts": 0.000148,
+          "samples_ns": [
+            139697221285,
+            139691155507,
+            139685974402
+          ],
+          "samples_ts": [
+            3.66507,
+            3.66523,
+            3.66536
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1195
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:20:13.992190+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:16:49Z\",\n    \"avg_ns\": 25177130418,\n    \"stddev_ns\": 655686,\n    \"avg_ts\": 5.083979,\n    \"stddev_ts\": 0.000124,\n    \"samples_ns\": [ 25176459670, 25177671080, 25177260506 ],\n    \"samples_ts\": [ 5.08411, 5.08387, 5.08395 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:18:29Z\",\n    \"avg_ns\": 34623296431,\n    \"stddev_ns\": 1364418,\n    \"avg_ts\": 3.696933,\n    \"stddev_ts\": 0.000143,\n    \"samples_ns\": [ 34624493270, 34621850619, 34623545406 ],\n    \"samples_ts\": [ 3.69681, 3.69709, 3.69691 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:16:49Z",
+          "avg_ns": 25177130418,
+          "stddev_ns": 655686,
+          "avg_ts": 5.083979,
+          "stddev_ts": 0.000124,
+          "samples_ns": [
+            25176459670,
+            25177671080,
+            25177260506
+          ],
+          "samples_ts": [
+            5.08411,
+            5.08387,
+            5.08395
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:18:29Z",
+          "avg_ns": 34623296431,
+          "stddev_ns": 1364418,
+          "avg_ts": 3.696933,
+          "stddev_ts": 0.000143,
+          "samples_ns": [
+            34624493270,
+            34621850619,
+            34623545406
+          ],
+          "samples_ts": [
+            3.69681,
+            3.69709,
+            3.69691
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1196
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:28:55.507481+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:20:15Z\",\n    \"avg_ns\": 25187716979,\n    \"stddev_ns\": 1788129,\n    \"avg_ts\": 5.081842,\n    \"stddev_ts\": 0.000358,\n    \"samples_ns\": [ 25186869800, 25186525408, 25189755731 ],\n    \"samples_ts\": [ 5.08201, 5.08208, 5.08143 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:21:55Z\",\n    \"avg_ns\": 139762560010,\n    \"stddev_ns\": 3262081,\n    \"avg_ts\": 3.663356,\n    \"stddev_ts\": 0.000085,\n    \"samples_ns\": [ 139759120613, 139763003259, 139765556159 ],\n    \"samples_ts\": [ 3.66345, 3.66334, 3.66328 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:20:15Z",
+          "avg_ns": 25187716979,
+          "stddev_ns": 1788129,
+          "avg_ts": 5.081842,
+          "stddev_ts": 0.000358,
+          "samples_ns": [
+            25186869800,
+            25186525408,
+            25189755731
+          ],
+          "samples_ts": [
+            5.08201,
+            5.08208,
+            5.08143
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:21:55Z",
+          "avg_ns": 139762560010,
+          "stddev_ns": 3262081,
+          "avg_ts": 3.663356,
+          "stddev_ts": 8.5e-05,
+          "samples_ns": [
+            139759120613,
+            139763003259,
+            139765556159
+          ],
+          "samples_ts": [
+            3.66345,
+            3.66334,
+            3.66328
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1197
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:37:29.014806+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:28:56Z\",\n    \"avg_ns\": 101839325667,\n    \"stddev_ns\": 8068062,\n    \"avg_ts\": 5.027527,\n    \"stddev_ts\": 0.000398,\n    \"samples_ns\": [ 101836705344, 101848371604, 101832900054 ],\n    \"samples_ts\": [ 5.02766, 5.02708, 5.02784 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:35:45Z\",\n    \"avg_ns\": 34526202595,\n    \"stddev_ns\": 1663470,\n    \"avg_ts\": 3.707329,\n    \"stddev_ts\": 0.000177,\n    \"samples_ns\": [ 34528097691, 34525452701, 34525057394 ],\n    \"samples_ts\": [ 3.70713, 3.70741, 3.70745 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:28:56Z",
+          "avg_ns": 101839325667,
+          "stddev_ns": 8068062,
+          "avg_ts": 5.027527,
+          "stddev_ts": 0.000398,
+          "samples_ns": [
+            101836705344,
+            101848371604,
+            101832900054
+          ],
+          "samples_ts": [
+            5.02766,
+            5.02708,
+            5.02784
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:35:45Z",
+          "avg_ns": 34526202595,
+          "stddev_ns": 1663470,
+          "avg_ts": 3.707329,
+          "stddev_ts": 0.000177,
+          "samples_ns": [
+            34528097691,
+            34525452701,
+            34525057394
+          ],
+          "samples_ts": [
+            3.70713,
+            3.70741,
+            3.70745
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1198
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:51:16.063512+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:37:30Z\",\n    \"avg_ns\": 101817202735,\n    \"stddev_ns\": 4309784,\n    \"avg_ts\": 5.028620,\n    \"stddev_ts\": 0.000212,\n    \"samples_ns\": [ 101820527167, 101812365347, 101818715693 ],\n    \"samples_ts\": [ 5.02846, 5.02886, 5.02855 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:44:17Z\",\n    \"avg_ns\": 139427105852,\n    \"stddev_ns\": 7209505,\n    \"avg_ts\": 3.672170,\n    \"stddev_ts\": 0.000189,\n    \"samples_ns\": [ 139435391160, 139422501265, 139423425133 ],\n    \"samples_ts\": [ 3.67195, 3.67229, 3.67227 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:37:30Z",
+          "avg_ns": 101817202735,
+          "stddev_ns": 4309784,
+          "avg_ts": 5.02862,
+          "stddev_ts": 0.000212,
+          "samples_ns": [
+            101820527167,
+            101812365347,
+            101818715693
+          ],
+          "samples_ts": [
+            5.02846,
+            5.02886,
+            5.02855
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:44:17Z",
+          "avg_ns": 139427105852,
+          "stddev_ns": 7209505,
+          "avg_ts": 3.67217,
+          "stddev_ts": 0.000189,
+          "samples_ns": [
+            139435391160,
+            139422501265,
+            139423425133
+          ],
+          "samples_ts": [
+            3.67195,
+            3.67229,
+            3.67227
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1199
+    },
+    {
+      "timestamp_utc": "2025-12-11T14:54:42.777208+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:51:17Z\",\n    \"avg_ns\": 25285639800,\n    \"stddev_ns\": 166996390,\n    \"avg_ts\": 5.062308,\n    \"stddev_ts\": 0.033307,\n    \"samples_ns\": [ 25478468555, 25189998738, 25188452107 ],\n    \"samples_ts\": [ 5.02385, 5.08138, 5.08169 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:52:58Z\",\n    \"avg_ns\": 34595128864,\n    \"stddev_ns\": 9296389,\n    \"avg_ts\": 3.699943,\n    \"stddev_ts\": 0.000994,\n    \"samples_ns\": [ 34584411271, 34600975673, 34599999649 ],\n    \"samples_ts\": [ 3.70109, 3.69932, 3.69942 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:51:17Z",
+          "avg_ns": 25285639800,
+          "stddev_ns": 166996390,
+          "avg_ts": 5.062308,
+          "stddev_ts": 0.033307,
+          "samples_ns": [
+            25478468555,
+            25189998738,
+            25188452107
+          ],
+          "samples_ts": [
+            5.02385,
+            5.08138,
+            5.08169
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:52:58Z",
+          "avg_ns": 34595128864,
+          "stddev_ns": 9296389,
+          "avg_ts": 3.699943,
+          "stddev_ts": 0.000994,
+          "samples_ns": [
+            34584411271,
+            34600975673,
+            34599999649
+          ],
+          "samples_ts": [
+            3.70109,
+            3.69932,
+            3.69942
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1200
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:03:24.634823+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:54:43Z\",\n    \"avg_ns\": 25478192822,\n    \"stddev_ns\": 3003324,\n    \"avg_ts\": 5.023904,\n    \"stddev_ts\": 0.000590,\n    \"samples_ns\": [ 25481638967, 25476718884, 25476220617 ],\n    \"samples_ts\": [ 5.02322, 5.02419, 5.02429 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T14:56:25Z\",\n    \"avg_ns\": 139476535940,\n    \"stddev_ns\": 68650498,\n    \"avg_ts\": 3.670869,\n    \"stddev_ts\": 0.001806,\n    \"samples_ns\": [ 139555434308, 139430450710, 139443722803 ],\n    \"samples_ts\": [ 3.66879, 3.67208, 3.67173 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:54:43Z",
+          "avg_ns": 25478192822,
+          "stddev_ns": 3003324,
+          "avg_ts": 5.023904,
+          "stddev_ts": 0.00059,
+          "samples_ns": [
+            25481638967,
+            25476718884,
+            25476220617
+          ],
+          "samples_ts": [
+            5.02322,
+            5.02419,
+            5.02429
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T14:56:25Z",
+          "avg_ns": 139476535940,
+          "stddev_ns": 68650498,
+          "avg_ts": 3.670869,
+          "stddev_ts": 0.001806,
+          "samples_ns": [
+            139555434308,
+            139430450710,
+            139443722803
+          ],
+          "samples_ts": [
+            3.66879,
+            3.67208,
+            3.67173
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1201
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:11:54.014247+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:03:25Z\",\n    \"avg_ns\": 101011451633,\n    \"stddev_ns\": 3788497,\n    \"avg_ts\": 5.068732,\n    \"stddev_ts\": 0.000189,\n    \"samples_ns\": [ 101015103392, 101011662709, 101007588800 ],\n    \"samples_ts\": [ 5.06855, 5.06872, 5.06893 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:10:09Z\",\n    \"avg_ns\": 34613902154,\n    \"stddev_ns\": 4565722,\n    \"avg_ts\": 3.697936,\n    \"stddev_ts\": 0.000487,\n    \"samples_ns\": [ 34617805797, 34615013402, 34608887264 ],\n    \"samples_ts\": [ 3.69752, 3.69782, 3.69847 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:03:25Z",
+          "avg_ns": 101011451633,
+          "stddev_ns": 3788497,
+          "avg_ts": 5.068732,
+          "stddev_ts": 0.000189,
+          "samples_ns": [
+            101015103392,
+            101011662709,
+            101007588800
+          ],
+          "samples_ts": [
+            5.06855,
+            5.06872,
+            5.06893
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:10:09Z",
+          "avg_ns": 34613902154,
+          "stddev_ns": 4565722,
+          "avg_ts": 3.697936,
+          "stddev_ts": 0.000487,
+          "samples_ns": [
+            34617805797,
+            34615013402,
+            34608887264
+          ],
+          "samples_ts": [
+            3.69752,
+            3.69782,
+            3.69847
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1202
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:25:38.371290+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:11:55Z\",\n    \"avg_ns\": 100999966595,\n    \"stddev_ns\": 1250041,\n    \"avg_ts\": 5.069309,\n    \"stddev_ts\": 0.000059,\n    \"samples_ns\": [ 101001206060, 100999803373, 100998890354 ],\n    \"samples_ts\": [ 5.06925, 5.06932, 5.06936 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:18:39Z\",\n    \"avg_ns\": 139612377773,\n    \"stddev_ns\": 7055220,\n    \"avg_ts\": 3.667297,\n    \"stddev_ts\": 0.000185,\n    \"samples_ns\": [ 139619426119, 139612351820, 139605355382 ],\n    \"samples_ts\": [ 3.66711, 3.6673, 3.66748 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:11:55Z",
+          "avg_ns": 100999966595,
+          "stddev_ns": 1250041,
+          "avg_ts": 5.069309,
+          "stddev_ts": 5.9e-05,
+          "samples_ns": [
+            101001206060,
+            100999803373,
+            100998890354
+          ],
+          "samples_ts": [
+            5.06925,
+            5.06932,
+            5.06936
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:18:39Z",
+          "avg_ns": 139612377773,
+          "stddev_ns": 7055220,
+          "avg_ts": 3.667297,
+          "stddev_ts": 0.000185,
+          "samples_ns": [
+            139619426119,
+            139612351820,
+            139605355382
+          ],
+          "samples_ts": [
+            3.66711,
+            3.6673,
+            3.66748
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1203
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:29:04.283729+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:25:39Z\",\n    \"avg_ns\": 25184254973,\n    \"stddev_ns\": 256858,\n    \"avg_ts\": 5.082541,\n    \"stddev_ts\": 0.000052,\n    \"samples_ns\": [ 25183967470, 25184335613, 25184461836 ],\n    \"samples_ts\": [ 5.0826, 5.08252, 5.0825 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:27:20Z\",\n    \"avg_ns\": 34549274052,\n    \"stddev_ns\": 940562,\n    \"avg_ts\": 3.704854,\n    \"stddev_ts\": 0.000099,\n    \"samples_ns\": [ 34550015292, 34548241593, 34549565272 ],\n    \"samples_ts\": [ 3.70477, 3.70496, 3.70482 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:25:39Z",
+          "avg_ns": 25184254973,
+          "stddev_ns": 256858,
+          "avg_ts": 5.082541,
+          "stddev_ts": 5.2e-05,
+          "samples_ns": [
+            25183967470,
+            25184335613,
+            25184461836
+          ],
+          "samples_ts": [
+            5.0826,
+            5.08252,
+            5.0825
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:27:20Z",
+          "avg_ns": 34549274052,
+          "stddev_ns": 940562,
+          "avg_ts": 3.704854,
+          "stddev_ts": 9.9e-05,
+          "samples_ns": [
+            34550015292,
+            34548241593,
+            34549565272
+          ],
+          "samples_ts": [
+            3.70477,
+            3.70496,
+            3.70482
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1204
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:37:44.789377+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:29:05Z\",\n    \"avg_ns\": 25180844427,\n    \"stddev_ns\": 3011569,\n    \"avg_ts\": 5.083229,\n    \"stddev_ts\": 0.000607,\n    \"samples_ns\": [ 25184290785, 25179490481, 25178752016 ],\n    \"samples_ts\": [ 5.08253, 5.0835, 5.08365 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:30:46Z\",\n    \"avg_ns\": 139421360881,\n    \"stddev_ns\": 5869413,\n    \"avg_ts\": 3.672321,\n    \"stddev_ts\": 0.000154,\n    \"samples_ns\": [ 139414599199, 139424599121, 139424884324 ],\n    \"samples_ts\": [ 3.6725, 3.67224, 3.67223 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:29:05Z",
+          "avg_ns": 25180844427,
+          "stddev_ns": 3011569,
+          "avg_ts": 5.083229,
+          "stddev_ts": 0.000607,
+          "samples_ns": [
+            25184290785,
+            25179490481,
+            25178752016
+          ],
+          "samples_ts": [
+            5.08253,
+            5.0835,
+            5.08365
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:30:46Z",
+          "avg_ns": 139421360881,
+          "stddev_ns": 5869413,
+          "avg_ts": 3.672321,
+          "stddev_ts": 0.000154,
+          "samples_ns": [
+            139414599199,
+            139424599121,
+            139424884324
+          ],
+          "samples_ts": [
+            3.6725,
+            3.67224,
+            3.67223
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1205
+    },
+    {
+      "timestamp_utc": "2025-12-11T15:46:14.809050+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:37:45Z\",\n    \"avg_ns\": 101186851951,\n    \"stddev_ns\": 3121139,\n    \"avg_ts\": 5.059946,\n    \"stddev_ts\": 0.000155,\n    \"samples_ns\": [ 101185420478, 101190414351, 101184721025 ],\n    \"samples_ts\": [ 5.06002, 5.05977, 5.06005 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:44:30Z\",\n    \"avg_ns\": 34581800782,\n    \"stddev_ns\": 1809556,\n    \"avg_ts\": 3.701369,\n    \"stddev_ts\": 0.000193,\n    \"samples_ns\": [ 34583875822, 34580866085, 34580660440 ],\n    \"samples_ts\": [ 3.70115, 3.70147, 3.70149 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:37:45Z",
+          "avg_ns": 101186851951,
+          "stddev_ns": 3121139,
+          "avg_ts": 5.059946,
+          "stddev_ts": 0.000155,
+          "samples_ns": [
+            101185420478,
+            101190414351,
+            101184721025
+          ],
+          "samples_ts": [
+            5.06002,
+            5.05977,
+            5.06005
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:44:30Z",
+          "avg_ns": 34581800782,
+          "stddev_ns": 1809556,
+          "avg_ts": 3.701369,
+          "stddev_ts": 0.000193,
+          "samples_ns": [
+            34583875822,
+            34580866085,
+            34580660440
+          ],
+          "samples_ts": [
+            3.70115,
+            3.70147,
+            3.70149
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1206
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:00:00.180701+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:46:15Z\",\n    \"avg_ns\": 101147581125,\n    \"stddev_ns\": 76736,\n    \"avg_ts\": 5.061910,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 101147639210, 101147610031, 101147494134 ],\n    \"samples_ts\": [ 5.06191, 5.06191, 5.06191 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T15:53:00Z\",\n    \"avg_ns\": 139749786120,\n    \"stddev_ns\": 33659748,\n    \"avg_ts\": 3.663691,\n    \"stddev_ts\": 0.000882,\n    \"samples_ns\": [ 139788647646, 139729794116, 139730916598 ],\n    \"samples_ts\": [ 3.66267, 3.66421, 3.66419 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:46:15Z",
+          "avg_ns": 101147581125,
+          "stddev_ns": 76736,
+          "avg_ts": 5.06191,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            101147639210,
+            101147610031,
+            101147494134
+          ],
+          "samples_ts": [
+            5.06191,
+            5.06191,
+            5.06191
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T15:53:00Z",
+          "avg_ns": 139749786120,
+          "stddev_ns": 33659748,
+          "avg_ts": 3.663691,
+          "stddev_ts": 0.000882,
+          "samples_ns": [
+            139788647646,
+            139729794116,
+            139730916598
+          ],
+          "samples_ts": [
+            3.66267,
+            3.66421,
+            3.66419
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1207
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:03:26.671220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:00:01Z\",\n    \"avg_ns\": 25313580685,\n    \"stddev_ns\": 156187248,\n    \"avg_ts\": 5.056702,\n    \"stddev_ts\": 0.031135,\n    \"samples_ns\": [ 25485266424, 25275564707, 25179910926 ],\n    \"samples_ts\": [ 5.02251, 5.06418, 5.08342 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:01:42Z\",\n    \"avg_ns\": 34561676432,\n    \"stddev_ns\": 1867595,\n    \"avg_ts\": 3.703524,\n    \"stddev_ts\": 0.000200,\n    \"samples_ns\": [ 34563404001, 34559694819, 34561930476 ],\n    \"samples_ts\": [ 3.70334, 3.70374, 3.7035 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:00:01Z",
+          "avg_ns": 25313580685,
+          "stddev_ns": 156187248,
+          "avg_ts": 5.056702,
+          "stddev_ts": 0.031135,
+          "samples_ns": [
+            25485266424,
+            25275564707,
+            25179910926
+          ],
+          "samples_ts": [
+            5.02251,
+            5.06418,
+            5.08342
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:01:42Z",
+          "avg_ns": 34561676432,
+          "stddev_ns": 1867595,
+          "avg_ts": 3.703524,
+          "stddev_ts": 0.0002,
+          "samples_ns": [
+            34563404001,
+            34559694819,
+            34561930476
+          ],
+          "samples_ts": [
+            3.70334,
+            3.70374,
+            3.7035
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1208
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:12:07.317939+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:03:27Z\",\n    \"avg_ns\": 25180343453,\n    \"stddev_ns\": 1210759,\n    \"avg_ts\": 5.083330,\n    \"stddev_ts\": 0.000244,\n    \"samples_ns\": [ 25181159648, 25180918365, 25178952346 ],\n    \"samples_ts\": [ 5.08317, 5.08321, 5.08361 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:05:08Z\",\n    \"avg_ns\": 139451254277,\n    \"stddev_ns\": 5180545,\n    \"avg_ts\": 3.671534,\n    \"stddev_ts\": 0.000136,\n    \"samples_ns\": [ 139457147357, 139447417932, 139449197542 ],\n    \"samples_ts\": [ 3.67138, 3.67163, 3.67159 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:03:27Z",
+          "avg_ns": 25180343453,
+          "stddev_ns": 1210759,
+          "avg_ts": 5.08333,
+          "stddev_ts": 0.000244,
+          "samples_ns": [
+            25181159648,
+            25180918365,
+            25178952346
+          ],
+          "samples_ts": [
+            5.08317,
+            5.08321,
+            5.08361
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:05:08Z",
+          "avg_ns": 139451254277,
+          "stddev_ns": 5180545,
+          "avg_ts": 3.671534,
+          "stddev_ts": 0.000136,
+          "samples_ns": [
+            139457147357,
+            139447417932,
+            139449197542
+          ],
+          "samples_ts": [
+            3.67138,
+            3.67163,
+            3.67159
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1209
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:20:39.768559+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:12:08Z\",\n    \"avg_ns\": 101841375060,\n    \"stddev_ns\": 1779177,\n    \"avg_ts\": 5.027426,\n    \"stddev_ts\": 0.000088,\n    \"samples_ns\": [ 101842825603, 101839389861, 101841909716 ],\n    \"samples_ts\": [ 5.02735, 5.02752, 5.0274 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:18:55Z\",\n    \"avg_ns\": 34501829596,\n    \"stddev_ns\": 3081194,\n    \"avg_ts\": 3.709948,\n    \"stddev_ts\": 0.000331,\n    \"samples_ns\": [ 34505375570, 34499886865, 34500226354 ],\n    \"samples_ts\": [ 3.70957, 3.71016, 3.71012 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:12:08Z",
+          "avg_ns": 101841375060,
+          "stddev_ns": 1779177,
+          "avg_ts": 5.027426,
+          "stddev_ts": 8.8e-05,
+          "samples_ns": [
+            101842825603,
+            101839389861,
+            101841909716
+          ],
+          "samples_ts": [
+            5.02735,
+            5.02752,
+            5.0274
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:18:55Z",
+          "avg_ns": 34501829596,
+          "stddev_ns": 3081194,
+          "avg_ts": 3.709948,
+          "stddev_ts": 0.000331,
+          "samples_ns": [
+            34505375570,
+            34499886865,
+            34500226354
+          ],
+          "samples_ts": [
+            3.70957,
+            3.71016,
+            3.71012
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1210
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:34:27.946674+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:20:40Z\",\n    \"avg_ns\": 101830386556,\n    \"stddev_ns\": 4358729,\n    \"avg_ts\": 5.027969,\n    \"stddev_ts\": 0.000214,\n    \"samples_ns\": [ 101826156149, 101834819648, 101830183873 ],\n    \"samples_ts\": [ 5.02818, 5.02775, 5.02798 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:27:28Z\",\n    \"avg_ns\": 139752937583,\n    \"stddev_ns\": 3224444,\n    \"avg_ts\": 3.663608,\n    \"stddev_ts\": 0.000085,\n    \"samples_ns\": [ 139756288251, 139752668239, 139749856259 ],\n    \"samples_ts\": [ 3.66352, 3.66362, 3.66369 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:20:40Z",
+          "avg_ns": 101830386556,
+          "stddev_ns": 4358729,
+          "avg_ts": 5.027969,
+          "stddev_ts": 0.000214,
+          "samples_ns": [
+            101826156149,
+            101834819648,
+            101830183873
+          ],
+          "samples_ts": [
+            5.02818,
+            5.02775,
+            5.02798
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:27:28Z",
+          "avg_ns": 139752937583,
+          "stddev_ns": 3224444,
+          "avg_ts": 3.663608,
+          "stddev_ts": 8.5e-05,
+          "samples_ns": [
+            139756288251,
+            139752668239,
+            139749856259
+          ],
+          "samples_ts": [
+            3.66352,
+            3.66362,
+            3.66369
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1211
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:37:53.870564+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:34:29Z\",\n    \"avg_ns\": 25182659335,\n    \"stddev_ns\": 1933220,\n    \"avg_ts\": 5.082863,\n    \"stddev_ts\": 0.000389,\n    \"samples_ns\": [ 25181775946, 25184869328, 25181332732 ],\n    \"samples_ts\": [ 5.08304, 5.08242, 5.08313 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:36:09Z\",\n    \"avg_ns\": 34533781009,\n    \"stddev_ns\": 2397639,\n    \"avg_ts\": 3.706516,\n    \"stddev_ts\": 0.000256,\n    \"samples_ns\": [ 34534697968, 34535569528, 34531075533 ],\n    \"samples_ts\": [ 3.70642, 3.70632, 3.70681 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:34:29Z",
+          "avg_ns": 25182659335,
+          "stddev_ns": 1933220,
+          "avg_ts": 5.082863,
+          "stddev_ts": 0.000389,
+          "samples_ns": [
+            25181775946,
+            25184869328,
+            25181332732
+          ],
+          "samples_ts": [
+            5.08304,
+            5.08242,
+            5.08313
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:36:09Z",
+          "avg_ns": 34533781009,
+          "stddev_ns": 2397639,
+          "avg_ts": 3.706516,
+          "stddev_ts": 0.000256,
+          "samples_ns": [
+            34534697968,
+            34535569528,
+            34531075533
+          ],
+          "samples_ts": [
+            3.70642,
+            3.70632,
+            3.70681
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1212
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:46:36.238541+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:37:55Z\",\n    \"avg_ns\": 25188183601,\n    \"stddev_ns\": 2498055,\n    \"avg_ts\": 5.081748,\n    \"stddev_ts\": 0.000503,\n    \"samples_ns\": [ 25190869853, 25187736653, 25185944298 ],\n    \"samples_ts\": [ 5.08121, 5.08184, 5.0822 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:39:35Z\",\n    \"avg_ns\": 139993388034,\n    \"stddev_ns\": 4308136,\n    \"avg_ts\": 3.657316,\n    \"stddev_ts\": 0.000112,\n    \"samples_ns\": [ 139988796886, 139994111647, 139997255571 ],\n    \"samples_ts\": [ 3.65744, 3.6573, 3.65721 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:37:55Z",
+          "avg_ns": 25188183601,
+          "stddev_ns": 2498055,
+          "avg_ts": 5.081748,
+          "stddev_ts": 0.000503,
+          "samples_ns": [
+            25190869853,
+            25187736653,
+            25185944298
+          ],
+          "samples_ts": [
+            5.08121,
+            5.08184,
+            5.0822
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:39:35Z",
+          "avg_ns": 139993388034,
+          "stddev_ns": 4308136,
+          "avg_ts": 3.657316,
+          "stddev_ts": 0.000112,
+          "samples_ns": [
+            139988796886,
+            139994111647,
+            139997255571
+          ],
+          "samples_ts": [
+            3.65744,
+            3.6573,
+            3.65721
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1213
+    },
+    {
+      "timestamp_utc": "2025-12-11T16:55:05.439426+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:46:37Z\",\n    \"avg_ns\": 101007010328,\n    \"stddev_ns\": 4612305,\n    \"avg_ts\": 5.068955,\n    \"stddev_ts\": 0.000231,\n    \"samples_ns\": [ 101008955948, 101010319256, 101001755781 ],\n    \"samples_ts\": [ 5.06886, 5.06879, 5.06922 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:53:21Z\",\n    \"avg_ns\": 34539547222,\n    \"stddev_ns\": 3733434,\n    \"avg_ts\": 3.705897,\n    \"stddev_ts\": 0.000401,\n    \"samples_ns\": [ 34543857331, 34537468004, 34537316331 ],\n    \"samples_ts\": [ 3.70543, 3.70612, 3.70614 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:46:37Z",
+          "avg_ns": 101007010328,
+          "stddev_ns": 4612305,
+          "avg_ts": 5.068955,
+          "stddev_ts": 0.000231,
+          "samples_ns": [
+            101008955948,
+            101010319256,
+            101001755781
+          ],
+          "samples_ts": [
+            5.06886,
+            5.06879,
+            5.06922
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:53:21Z",
+          "avg_ns": 34539547222,
+          "stddev_ns": 3733434,
+          "avg_ts": 3.705897,
+          "stddev_ts": 0.000401,
+          "samples_ns": [
+            34543857331,
+            34537468004,
+            34537316331
+          ],
+          "samples_ts": [
+            3.70543,
+            3.70612,
+            3.70614
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1214
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:08:49.307546+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T16:55:06Z\",\n    \"avg_ns\": 101002167653,\n    \"stddev_ns\": 1704498,\n    \"avg_ts\": 5.069198,\n    \"stddev_ts\": 0.000083,\n    \"samples_ns\": [ 101001143178, 101001295641, 101004064142 ],\n    \"samples_ts\": [ 5.06925, 5.06924, 5.0691 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:01:50Z\",\n    \"avg_ns\": 139417248011,\n    \"stddev_ns\": 6598655,\n    \"avg_ts\": 3.672429,\n    \"stddev_ts\": 0.000173,\n    \"samples_ns\": [ 139421381171, 139409663174, 139420699690 ],\n    \"samples_ts\": [ 3.67232, 3.67263, 3.67234 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T16:55:06Z",
+          "avg_ns": 101002167653,
+          "stddev_ns": 1704498,
+          "avg_ts": 5.069198,
+          "stddev_ts": 8.3e-05,
+          "samples_ns": [
+            101001143178,
+            101001295641,
+            101004064142
+          ],
+          "samples_ts": [
+            5.06925,
+            5.06924,
+            5.0691
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:01:50Z",
+          "avg_ns": 139417248011,
+          "stddev_ns": 6598655,
+          "avg_ts": 3.672429,
+          "stddev_ts": 0.000173,
+          "samples_ns": [
+            139421381171,
+            139409663174,
+            139420699690
+          ],
+          "samples_ts": [
+            3.67232,
+            3.67263,
+            3.67234
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1215
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:12:15.198242+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:08:50Z\",\n    \"avg_ns\": 25179076290,\n    \"stddev_ns\": 478268,\n    \"avg_ts\": 5.083586,\n    \"stddev_ts\": 0.000085,\n    \"samples_ns\": [ 25179532467, 25178698836, 25178997569 ],\n    \"samples_ts\": [ 5.08349, 5.08366, 5.0836 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:10:31Z\",\n    \"avg_ns\": 34536786151,\n    \"stddev_ns\": 2276702,\n    \"avg_ts\": 3.706193,\n    \"stddev_ts\": 0.000244,\n    \"samples_ns\": [ 34539412473, 34535371991, 34535573989 ],\n    \"samples_ts\": [ 3.70591, 3.70634, 3.70632 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:08:50Z",
+          "avg_ns": 25179076290,
+          "stddev_ns": 478268,
+          "avg_ts": 5.083586,
+          "stddev_ts": 8.5e-05,
+          "samples_ns": [
+            25179532467,
+            25178698836,
+            25178997569
+          ],
+          "samples_ts": [
+            5.08349,
+            5.08366,
+            5.0836
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:10:31Z",
+          "avg_ns": 34536786151,
+          "stddev_ns": 2276702,
+          "avg_ts": 3.706193,
+          "stddev_ts": 0.000244,
+          "samples_ns": [
+            34539412473,
+            34535371991,
+            34535573989
+          ],
+          "samples_ts": [
+            3.70591,
+            3.70634,
+            3.70632
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1216
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:20:57.480344+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:12:16Z\",\n    \"avg_ns\": 25188390678,\n    \"stddev_ns\": 386590,\n    \"avg_ts\": 5.081706,\n    \"stddev_ts\": 0.000071,\n    \"samples_ns\": [ 25188705777, 25188009974, 25188456284 ],\n    \"samples_ts\": [ 5.08164, 5.08178, 5.08169 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:13:57Z\",\n    \"avg_ns\": 139976454896,\n    \"stddev_ns\": 8690912,\n    \"avg_ts\": 3.657758,\n    \"stddev_ts\": 0.000227,\n    \"samples_ns\": [ 139984056131, 139978316250, 139966992308 ],\n    \"samples_ts\": [ 3.65756, 3.65771, 3.65801 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:12:16Z",
+          "avg_ns": 25188390678,
+          "stddev_ns": 386590,
+          "avg_ts": 5.081706,
+          "stddev_ts": 7.1e-05,
+          "samples_ns": [
+            25188705777,
+            25188009974,
+            25188456284
+          ],
+          "samples_ts": [
+            5.08164,
+            5.08178,
+            5.08169
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:13:57Z",
+          "avg_ns": 139976454896,
+          "stddev_ns": 8690912,
+          "avg_ts": 3.657758,
+          "stddev_ts": 0.000227,
+          "samples_ns": [
+            139984056131,
+            139978316250,
+            139966992308
+          ],
+          "samples_ts": [
+            3.65756,
+            3.65771,
+            3.65801
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1217
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:29:27.295734+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:20:58Z\",\n    \"avg_ns\": 101149270792,\n    \"stddev_ns\": 2177306,\n    \"avg_ts\": 5.061826,\n    \"stddev_ts\": 0.000107,\n    \"samples_ns\": [ 101151730635, 101148056593, 101148025150 ],\n    \"samples_ts\": [ 5.0617, 5.06189, 5.06189 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:27:43Z\",\n    \"avg_ns\": 34552816449,\n    \"stddev_ns\": 6944458,\n    \"avg_ts\": 3.704474,\n    \"stddev_ts\": 0.000744,\n    \"samples_ns\": [ 34560810447, 34548341541, 34549297361 ],\n    \"samples_ts\": [ 3.70362, 3.70495, 3.70485 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:20:58Z",
+          "avg_ns": 101149270792,
+          "stddev_ns": 2177306,
+          "avg_ts": 5.061826,
+          "stddev_ts": 0.000107,
+          "samples_ns": [
+            101151730635,
+            101148056593,
+            101148025150
+          ],
+          "samples_ts": [
+            5.0617,
+            5.06189,
+            5.06189
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:27:43Z",
+          "avg_ns": 34552816449,
+          "stddev_ns": 6944458,
+          "avg_ts": 3.704474,
+          "stddev_ts": 0.000744,
+          "samples_ns": [
+            34560810447,
+            34548341541,
+            34549297361
+          ],
+          "samples_ts": [
+            3.70362,
+            3.70495,
+            3.70485
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1218
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:43:12.363013+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:29:28Z\",\n    \"avg_ns\": 101158899817,\n    \"stddev_ns\": 938942,\n    \"avg_ts\": 5.061344,\n    \"stddev_ts\": 0.000041,\n    \"samples_ns\": [ 101158493327, 101158357832, 101159848294 ],\n    \"samples_ts\": [ 5.06136, 5.06137, 5.0613 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:36:13Z\",\n    \"avg_ns\": 139621420933,\n    \"stddev_ns\": 1764713,\n    \"avg_ts\": 3.667059,\n    \"stddev_ts\": 0.000045,\n    \"samples_ns\": [ 139619787693, 139621250646, 139623224461 ],\n    \"samples_ts\": [ 3.6671, 3.66706, 3.66701 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:29:28Z",
+          "avg_ns": 101158899817,
+          "stddev_ns": 938942,
+          "avg_ts": 5.061344,
+          "stddev_ts": 4.1e-05,
+          "samples_ns": [
+            101158493327,
+            101158357832,
+            101159848294
+          ],
+          "samples_ts": [
+            5.06136,
+            5.06137,
+            5.0613
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:36:13Z",
+          "avg_ns": 139621420933,
+          "stddev_ns": 1764713,
+          "avg_ts": 3.667059,
+          "stddev_ts": 4.5e-05,
+          "samples_ns": [
+            139619787693,
+            139621250646,
+            139623224461
+          ],
+          "samples_ts": [
+            3.6671,
+            3.66706,
+            3.66701
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1219
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:46:38.181082+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:43:13Z\",\n    \"avg_ns\": 25178992719,\n    \"stddev_ns\": 2374887,\n    \"avg_ts\": 5.083603,\n    \"stddev_ts\": 0.000478,\n    \"samples_ns\": [ 25181696554, 25178003944, 25177277660 ],\n    \"samples_ts\": [ 5.08306, 5.0838, 5.08395 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:44:54Z\",\n    \"avg_ns\": 34523260212,\n    \"stddev_ns\": 1092763,\n    \"avg_ts\": 3.707645,\n    \"stddev_ts\": 0.000116,\n    \"samples_ns\": [ 34524250390, 34523416483, 34522113764 ],\n    \"samples_ts\": [ 3.70754, 3.70763, 3.70777 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:43:13Z",
+          "avg_ns": 25178992719,
+          "stddev_ns": 2374887,
+          "avg_ts": 5.083603,
+          "stddev_ts": 0.000478,
+          "samples_ns": [
+            25181696554,
+            25178003944,
+            25177277660
+          ],
+          "samples_ts": [
+            5.08306,
+            5.0838,
+            5.08395
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:44:54Z",
+          "avg_ns": 34523260212,
+          "stddev_ns": 1092763,
+          "avg_ts": 3.707645,
+          "stddev_ts": 0.000116,
+          "samples_ns": [
+            34524250390,
+            34523416483,
+            34522113764
+          ],
+          "samples_ts": [
+            3.70754,
+            3.70763,
+            3.70777
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1220
+    },
+    {
+      "timestamp_utc": "2025-12-11T17:55:20.859451+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:46:39Z\",\n    \"avg_ns\": 25179087221,\n    \"stddev_ns\": 3430894,\n    \"avg_ts\": 5.083584,\n    \"stddev_ts\": 0.000693,\n    \"samples_ns\": [ 25183048871, 25177112414, 25177100378 ],\n    \"samples_ts\": [ 5.08278, 5.08398, 5.08398 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:48:20Z\",\n    \"avg_ns\": 140119398630,\n    \"stddev_ns\": 2428364,\n    \"avg_ts\": 3.654027,\n    \"stddev_ts\": 0.000063,\n    \"samples_ns\": [ 140116602045, 140120973813, 140120620032 ],\n    \"samples_ts\": [ 3.6541, 3.65399, 3.65399 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:46:39Z",
+          "avg_ns": 25179087221,
+          "stddev_ns": 3430894,
+          "avg_ts": 5.083584,
+          "stddev_ts": 0.000693,
+          "samples_ns": [
+            25183048871,
+            25177112414,
+            25177100378
+          ],
+          "samples_ts": [
+            5.08278,
+            5.08398,
+            5.08398
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:48:20Z",
+          "avg_ns": 140119398630,
+          "stddev_ns": 2428364,
+          "avg_ts": 3.654027,
+          "stddev_ts": 6.3e-05,
+          "samples_ns": [
+            140116602045,
+            140120973813,
+            140120620032
+          ],
+          "samples_ts": [
+            3.6541,
+            3.65399,
+            3.65399
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1221
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:03:53.452263+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T17:55:22Z\",\n    \"avg_ns\": 101821864809,\n    \"stddev_ns\": 2314421,\n    \"avg_ts\": 5.028390,\n    \"stddev_ts\": 0.000112,\n    \"samples_ns\": [ 101824450548, 101820199958, 101820943923 ],\n    \"samples_ts\": [ 5.02826, 5.02847, 5.02844 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:02:09Z\",\n    \"avg_ns\": 34581446638,\n    \"stddev_ns\": 3995269,\n    \"avg_ts\": 3.701407,\n    \"stddev_ts\": 0.000427,\n    \"samples_ns\": [ 34585640708, 34577695841, 34581003366 ],\n    \"samples_ts\": [ 3.70096, 3.70181, 3.70145 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T17:55:22Z",
+          "avg_ns": 101821864809,
+          "stddev_ns": 2314421,
+          "avg_ts": 5.02839,
+          "stddev_ts": 0.000112,
+          "samples_ns": [
+            101824450548,
+            101820199958,
+            101820943923
+          ],
+          "samples_ts": [
+            5.02826,
+            5.02847,
+            5.02844
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:02:09Z",
+          "avg_ns": 34581446638,
+          "stddev_ns": 3995269,
+          "avg_ts": 3.701407,
+          "stddev_ts": 0.000427,
+          "samples_ns": [
+            34585640708,
+            34577695841,
+            34581003366
+          ],
+          "samples_ts": [
+            3.70096,
+            3.70181,
+            3.70145
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1222
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:17:40.643379+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:03:54Z\",\n    \"avg_ns\": 101823034398,\n    \"stddev_ns\": 2590808,\n    \"avg_ts\": 5.028332,\n    \"stddev_ts\": 0.000127,\n    \"samples_ns\": [ 101825868007, 101822384708, 101820850480 ],\n    \"samples_ts\": [ 5.02819, 5.02836, 5.02844 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:10:42Z\",\n    \"avg_ns\": 139425056844,\n    \"stddev_ns\": 7676280,\n    \"avg_ts\": 3.672224,\n    \"stddev_ts\": 0.000202,\n    \"samples_ns\": [ 139433783901, 139421983558, 139419403074 ],\n    \"samples_ts\": [ 3.67199, 3.6723, 3.67237 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:03:54Z",
+          "avg_ns": 101823034398,
+          "stddev_ns": 2590808,
+          "avg_ts": 5.028332,
+          "stddev_ts": 0.000127,
+          "samples_ns": [
+            101825868007,
+            101822384708,
+            101820850480
+          ],
+          "samples_ts": [
+            5.02819,
+            5.02836,
+            5.02844
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:10:42Z",
+          "avg_ns": 139425056844,
+          "stddev_ns": 7676280,
+          "avg_ts": 3.672224,
+          "stddev_ts": 0.000202,
+          "samples_ns": [
+            139433783901,
+            139421983558,
+            139419403074
+          ],
+          "samples_ts": [
+            3.67199,
+            3.6723,
+            3.67237
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1223
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:20:06.022423+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:17:41Z\",\n    \"avg_ns\": 17021583351,\n    \"stddev_ns\": 6799910,\n    \"avg_ts\": 7.519865,\n    \"stddev_ts\": 0.003003,\n    \"samples_ns\": [ 17022488940, 17027882570, 17014378545 ],\n    \"samples_ts\": [ 7.51946, 7.51708, 7.52305 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:18:49Z\",\n    \"avg_ns\": 25270001580,\n    \"stddev_ns\": 5932131,\n    \"avg_ts\": 5.065295,\n    \"stddev_ts\": 0.001189,\n    \"samples_ns\": [ 25273947269, 25263182235, 25272875237 ],\n    \"samples_ts\": [ 5.0645, 5.06666, 5.06472 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:17:41Z",
+          "avg_ns": 17021583351,
+          "stddev_ns": 6799910,
+          "avg_ts": 7.519865,
+          "stddev_ts": 0.003003,
+          "samples_ns": [
+            17022488940,
+            17027882570,
+            17014378545
+          ],
+          "samples_ts": [
+            7.51946,
+            7.51708,
+            7.52305
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:18:49Z",
+          "avg_ns": 25270001580,
+          "stddev_ns": 5932131,
+          "avg_ts": 5.065295,
+          "stddev_ts": 0.001189,
+          "samples_ns": [
+            25273947269,
+            25263182235,
+            25272875237
+          ],
+          "samples_ts": [
+            5.0645,
+            5.06666,
+            5.06472
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1224
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:26:22.708918+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:20:07Z\",\n    \"avg_ns\": 17023055440,\n    \"stddev_ns\": 910873,\n    \"avg_ts\": 7.519214,\n    \"stddev_ts\": 0.000394,\n    \"samples_ns\": [ 17023459754, 17022032892, 17023673676 ],\n    \"samples_ts\": [ 7.51904, 7.51967, 7.51894 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:21:15Z\",\n    \"avg_ns\": 102364016378,\n    \"stddev_ns\": 74856803,\n    \"avg_ts\": 5.001759,\n    \"stddev_ts\": 0.003657,\n    \"samples_ns\": [ 102341263741, 102303177225, 102447608170 ],\n    \"samples_ts\": [ 5.00287, 5.00473, 4.99768 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:20:07Z",
+          "avg_ns": 17023055440,
+          "stddev_ns": 910873,
+          "avg_ts": 7.519214,
+          "stddev_ts": 0.000394,
+          "samples_ns": [
+            17023459754,
+            17022032892,
+            17023673676
+          ],
+          "samples_ts": [
+            7.51904,
+            7.51967,
+            7.51894
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:21:15Z",
+          "avg_ns": 102364016378,
+          "stddev_ns": 74856803,
+          "avg_ts": 5.001759,
+          "stddev_ts": 0.003657,
+          "samples_ns": [
+            102341263741,
+            102303177225,
+            102447608170
+          ],
+          "samples_ts": [
+            5.00287,
+            5.00473,
+            4.99768
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1225
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:32:12.513508+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:26:23Z\",\n    \"avg_ns\": 68025168952,\n    \"stddev_ns\": 215234788,\n    \"avg_ts\": 7.526676,\n    \"stddev_ts\": 0.023806,\n    \"samples_ns\": [ 68248337412, 68008309720, 67818859725 ],\n    \"samples_ts\": [ 7.50201, 7.52849, 7.54952 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:30:56Z\",\n    \"avg_ns\": 25330850614,\n    \"stddev_ns\": 6767686,\n    \"avg_ts\": 5.053127,\n    \"stddev_ts\": 0.001350,\n    \"samples_ns\": [ 25323168006, 25335930760, 25333453076 ],\n    \"samples_ts\": [ 5.05466, 5.05211, 5.05261 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:26:23Z",
+          "avg_ns": 68025168952,
+          "stddev_ns": 215234788,
+          "avg_ts": 7.526676,
+          "stddev_ts": 0.023806,
+          "samples_ns": [
+            68248337412,
+            68008309720,
+            67818859725
+          ],
+          "samples_ts": [
+            7.50201,
+            7.52849,
+            7.54952
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:30:56Z",
+          "avg_ns": 25330850614,
+          "stddev_ns": 6767686,
+          "avg_ts": 5.053127,
+          "stddev_ts": 0.00135,
+          "samples_ns": [
+            25323168006,
+            25335930760,
+            25333453076
+          ],
+          "samples_ts": [
+            5.05466,
+            5.05211,
+            5.05261
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1226
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:41:54.362848+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:32:13Z\",\n    \"avg_ns\": 68273173357,\n    \"stddev_ns\": 8990819,\n    \"avg_ts\": 7.499285,\n    \"stddev_ts\": 0.000987,\n    \"samples_ns\": [ 68273368385, 68264094209, 68282057479 ],\n    \"samples_ts\": [ 7.49926, 7.50028, 7.49831 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:36:46Z\",\n    \"avg_ns\": 102418810277,\n    \"stddev_ns\": 121004656,\n    \"avg_ts\": 4.999086,\n    \"stddev_ts\": 0.005904,\n    \"samples_ns\": [ 102313336934, 102392184160, 102550909739 ],\n    \"samples_ts\": [ 5.00424, 5.00038, 4.99264 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:32:13Z",
+          "avg_ns": 68273173357,
+          "stddev_ns": 8990819,
+          "avg_ts": 7.499285,
+          "stddev_ts": 0.000987,
+          "samples_ns": [
+            68273368385,
+            68264094209,
+            68282057479
+          ],
+          "samples_ts": [
+            7.49926,
+            7.50028,
+            7.49831
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:36:46Z",
+          "avg_ns": 102418810277,
+          "stddev_ns": 121004656,
+          "avg_ts": 4.999086,
+          "stddev_ts": 0.005904,
+          "samples_ns": [
+            102313336934,
+            102392184160,
+            102550909739
+          ],
+          "samples_ts": [
+            5.00424,
+            5.00038,
+            4.99264
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1227
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:44:19.714020+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:41:55Z\",\n    \"avg_ns\": 17014632912,\n    \"stddev_ns\": 861782,\n    \"avg_ts\": 7.522936,\n    \"stddev_ts\": 0.000372,\n    \"samples_ns\": [ 17014731693, 17013746073, 17015420972 ],\n    \"samples_ts\": [ 7.52289, 7.52333, 7.52259 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:43:03Z\",\n    \"avg_ns\": 25274261994,\n    \"stddev_ns\": 8503140,\n    \"avg_ts\": 5.064441,\n    \"stddev_ts\": 0.001704,\n    \"samples_ns\": [ 25265830252, 25282834778, 25274120952 ],\n    \"samples_ts\": [ 5.06613, 5.06272, 5.06447 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:41:55Z",
+          "avg_ns": 17014632912,
+          "stddev_ns": 861782,
+          "avg_ts": 7.522936,
+          "stddev_ts": 0.000372,
+          "samples_ns": [
+            17014731693,
+            17013746073,
+            17015420972
+          ],
+          "samples_ts": [
+            7.52289,
+            7.52333,
+            7.52259
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:43:03Z",
+          "avg_ns": 25274261994,
+          "stddev_ns": 8503140,
+          "avg_ts": 5.064441,
+          "stddev_ts": 0.001704,
+          "samples_ns": [
+            25265830252,
+            25282834778,
+            25274120952
+          ],
+          "samples_ts": [
+            5.06613,
+            5.06272,
+            5.06447
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1228
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:50:36.219195+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:44:20Z\",\n    \"avg_ns\": 17023846562,\n    \"stddev_ns\": 5499531,\n    \"avg_ts\": 7.518865,\n    \"stddev_ts\": 0.002429,\n    \"samples_ns\": [ 17029100833, 17024308006, 17018130847 ],\n    \"samples_ts\": [ 7.51654, 7.51866, 7.52139 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:45:28Z\",\n    \"avg_ns\": 102310158659,\n    \"stddev_ns\": 16800118,\n    \"avg_ts\": 5.004391,\n    \"stddev_ts\": 0.000821,\n    \"samples_ns\": [ 102309401749, 102293755885, 102327318345 ],\n    \"samples_ts\": [ 5.00443, 5.00519, 5.00355 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:44:20Z",
+          "avg_ns": 17023846562,
+          "stddev_ns": 5499531,
+          "avg_ts": 7.518865,
+          "stddev_ts": 0.002429,
+          "samples_ns": [
+            17029100833,
+            17024308006,
+            17018130847
+          ],
+          "samples_ts": [
+            7.51654,
+            7.51866,
+            7.52139
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:45:28Z",
+          "avg_ns": 102310158659,
+          "stddev_ns": 16800118,
+          "avg_ts": 5.004391,
+          "stddev_ts": 0.000821,
+          "samples_ns": [
+            102309401749,
+            102293755885,
+            102327318345
+          ],
+          "samples_ts": [
+            5.00443,
+            5.00519,
+            5.00355
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1229
+    },
+    {
+      "timestamp_utc": "2025-12-11T18:56:27.535961+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:50:37Z\",\n    \"avg_ns\": 68468964978,\n    \"stddev_ns\": 7194969,\n    \"avg_ts\": 7.477841,\n    \"stddev_ts\": 0.000786,\n    \"samples_ns\": [ 68462537867, 68476737784, 68467619283 ],\n    \"samples_ts\": [ 7.47854, 7.47699, 7.47799 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:55:11Z\",\n    \"avg_ns\": 25298953259,\n    \"stddev_ns\": 15991690,\n    \"avg_ts\": 5.059499,\n    \"stddev_ts\": 0.003198,\n    \"samples_ns\": [ 25282528816, 25299860564, 25314470399 ],\n    \"samples_ts\": [ 5.06278, 5.05932, 5.0564 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:50:37Z",
+          "avg_ns": 68468964978,
+          "stddev_ns": 7194969,
+          "avg_ts": 7.477841,
+          "stddev_ts": 0.000786,
+          "samples_ns": [
+            68462537867,
+            68476737784,
+            68467619283
+          ],
+          "samples_ts": [
+            7.47854,
+            7.47699,
+            7.47799
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:55:11Z",
+          "avg_ns": 25298953259,
+          "stddev_ns": 15991690,
+          "avg_ts": 5.059499,
+          "stddev_ts": 0.003198,
+          "samples_ns": [
+            25282528816,
+            25299860564,
+            25314470399
+          ],
+          "samples_ts": [
+            5.06278,
+            5.05932,
+            5.0564
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1230
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:06:09.870462+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T18:56:28Z\",\n    \"avg_ns\": 68372546845,\n    \"stddev_ns\": 109459779,\n    \"avg_ts\": 7.488398,\n    \"stddev_ts\": 0.011998,\n    \"samples_ns\": [ 68454213557, 68415255611, 68248171368 ],\n    \"samples_ts\": [ 7.47945, 7.48371, 7.50203 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:01:02Z\",\n    \"avg_ns\": 102416908191,\n    \"stddev_ns\": 3875645,\n    \"avg_ts\": 4.999175,\n    \"stddev_ts\": 0.000188,\n    \"samples_ns\": [ 102420450576, 102417461743, 102412812256 ],\n    \"samples_ts\": [ 4.999, 4.99915, 4.99937 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T18:56:28Z",
+          "avg_ns": 68372546845,
+          "stddev_ns": 109459779,
+          "avg_ts": 7.488398,
+          "stddev_ts": 0.011998,
+          "samples_ns": [
+            68454213557,
+            68415255611,
+            68248171368
+          ],
+          "samples_ts": [
+            7.47945,
+            7.48371,
+            7.50203
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:01:02Z",
+          "avg_ns": 102416908191,
+          "stddev_ns": 3875645,
+          "avg_ts": 4.999175,
+          "stddev_ts": 0.000188,
+          "samples_ns": [
+            102420450576,
+            102417461743,
+            102412812256
+          ],
+          "samples_ts": [
+            4.999,
+            4.99915,
+            4.99937
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1231
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:08:35.182195+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:06:11Z\",\n    \"avg_ns\": 17019118292,\n    \"stddev_ns\": 3789735,\n    \"avg_ts\": 7.520954,\n    \"stddev_ts\": 0.001674,\n    \"samples_ns\": [ 17021869150, 17020687292, 17014798435 ],\n    \"samples_ts\": [ 7.51974, 7.52026, 7.52286 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:07:19Z\",\n    \"avg_ns\": 25252614908,\n    \"stddev_ns\": 16432822,\n    \"avg_ts\": 5.068783,\n    \"stddev_ts\": 0.003298,\n    \"samples_ns\": [ 25249143864, 25238194888, 25270505972 ],\n    \"samples_ts\": [ 5.06948, 5.07168, 5.06519 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:06:11Z",
+          "avg_ns": 17019118292,
+          "stddev_ns": 3789735,
+          "avg_ts": 7.520954,
+          "stddev_ts": 0.001674,
+          "samples_ns": [
+            17021869150,
+            17020687292,
+            17014798435
+          ],
+          "samples_ts": [
+            7.51974,
+            7.52026,
+            7.52286
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:07:19Z",
+          "avg_ns": 25252614908,
+          "stddev_ns": 16432822,
+          "avg_ts": 5.068783,
+          "stddev_ts": 0.003298,
+          "samples_ns": [
+            25249143864,
+            25238194888,
+            25270505972
+          ],
+          "samples_ts": [
+            5.06948,
+            5.07168,
+            5.06519
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1232
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:14:51.898024+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:08:36Z\",\n    \"avg_ns\": 17018224421,\n    \"stddev_ns\": 2083007,\n    \"avg_ts\": 7.521349,\n    \"stddev_ts\": 0.000919,\n    \"samples_ns\": [ 17020165489, 17016030738, 17018477037 ],\n    \"samples_ts\": [ 7.52049, 7.52232, 7.52124 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:09:44Z\",\n    \"avg_ns\": 102369023198,\n    \"stddev_ns\": 74246871,\n    \"avg_ts\": 5.001515,\n    \"stddev_ts\": 0.003628,\n    \"samples_ns\": [ 102372855310, 102292935859, 102441278427 ],\n    \"samples_ts\": [ 5.00133, 5.00523, 4.99799 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:08:36Z",
+          "avg_ns": 17018224421,
+          "stddev_ns": 2083007,
+          "avg_ts": 7.521349,
+          "stddev_ts": 0.000919,
+          "samples_ns": [
+            17020165489,
+            17016030738,
+            17018477037
+          ],
+          "samples_ts": [
+            7.52049,
+            7.52232,
+            7.52124
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:09:44Z",
+          "avg_ns": 102369023198,
+          "stddev_ns": 74246871,
+          "avg_ts": 5.001515,
+          "stddev_ts": 0.003628,
+          "samples_ns": [
+            102372855310,
+            102292935859,
+            102441278427
+          ],
+          "samples_ts": [
+            5.00133,
+            5.00523,
+            4.99799
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1233
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:20:45.119827+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:14:53Z\",\n    \"avg_ns\": 68946141188,\n    \"stddev_ns\": 36273878,\n    \"avg_ts\": 7.426088,\n    \"stddev_ts\": 0.003907,\n    \"samples_ns\": [ 68907598854, 68951211628, 68979613082 ],\n    \"samples_ts\": [ 7.43024, 7.42554, 7.42248 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:19:28Z\",\n    \"avg_ns\": 25299475585,\n    \"stddev_ns\": 7941565,\n    \"avg_ts\": 5.059394,\n    \"stddev_ts\": 0.001588,\n    \"samples_ns\": [ 25302775507, 25305231754, 25290419496 ],\n    \"samples_ts\": [ 5.05873, 5.05824, 5.06121 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:14:53Z",
+          "avg_ns": 68946141188,
+          "stddev_ns": 36273878,
+          "avg_ts": 7.426088,
+          "stddev_ts": 0.003907,
+          "samples_ns": [
+            68907598854,
+            68951211628,
+            68979613082
+          ],
+          "samples_ts": [
+            7.43024,
+            7.42554,
+            7.42248
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:19:28Z",
+          "avg_ns": 25299475585,
+          "stddev_ns": 7941565,
+          "avg_ts": 5.059394,
+          "stddev_ts": 0.001588,
+          "samples_ns": [
+            25302775507,
+            25305231754,
+            25290419496
+          ],
+          "samples_ts": [
+            5.05873,
+            5.05824,
+            5.06121
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1234
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:30:29.437722+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:20:46Z\",\n    \"avg_ns\": 68970977639,\n    \"stddev_ns\": 12955087,\n    \"avg_ts\": 7.423412,\n    \"stddev_ts\": 0.001394,\n    \"samples_ns\": [ 68981193639, 68975325927, 68956413353 ],\n    \"samples_ts\": [ 7.42231, 7.42294, 7.42498 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:25:22Z\",\n    \"avg_ns\": 102305931932,\n    \"stddev_ns\": 8968079,\n    \"avg_ts\": 5.004597,\n    \"stddev_ts\": 0.000438,\n    \"samples_ns\": [ 102296847881, 102314767460, 102306180456 ],\n    \"samples_ts\": [ 5.00504, 5.00417, 5.00459 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:20:46Z",
+          "avg_ns": 68970977639,
+          "stddev_ns": 12955087,
+          "avg_ts": 7.423412,
+          "stddev_ts": 0.001394,
+          "samples_ns": [
+            68981193639,
+            68975325927,
+            68956413353
+          ],
+          "samples_ts": [
+            7.42231,
+            7.42294,
+            7.42498
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:25:22Z",
+          "avg_ns": 102305931932,
+          "stddev_ns": 8968079,
+          "avg_ts": 5.004597,
+          "stddev_ts": 0.000438,
+          "samples_ns": [
+            102296847881,
+            102314767460,
+            102306180456
+          ],
+          "samples_ts": [
+            5.00504,
+            5.00417,
+            5.00459
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1235
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:32:54.905355+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:30:30Z\",\n    \"avg_ns\": 17016941907,\n    \"stddev_ns\": 1231143,\n    \"avg_ts\": 7.521916,\n    \"stddev_ts\": 0.000544,\n    \"samples_ns\": [ 17017937817, 17017322494, 17015565410 ],\n    \"samples_ts\": [ 7.52148, 7.52175, 7.52252 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:31:38Z\",\n    \"avg_ns\": 25307544054,\n    \"stddev_ns\": 4335414,\n    \"avg_ts\": 5.057780,\n    \"stddev_ts\": 0.000865,\n    \"samples_ns\": [ 25305012533, 25312543279, 25305076352 ],\n    \"samples_ts\": [ 5.05829, 5.05678, 5.05827 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:30:30Z",
+          "avg_ns": 17016941907,
+          "stddev_ns": 1231143,
+          "avg_ts": 7.521916,
+          "stddev_ts": 0.000544,
+          "samples_ns": [
+            17017937817,
+            17017322494,
+            17015565410
+          ],
+          "samples_ts": [
+            7.52148,
+            7.52175,
+            7.52252
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:31:38Z",
+          "avg_ns": 25307544054,
+          "stddev_ns": 4335414,
+          "avg_ts": 5.05778,
+          "stddev_ts": 0.000865,
+          "samples_ns": [
+            25305012533,
+            25312543279,
+            25305076352
+          ],
+          "samples_ts": [
+            5.05829,
+            5.05678,
+            5.05827
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1236
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:39:11.494517+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:32:56Z\",\n    \"avg_ns\": 17012880085,\n    \"stddev_ns\": 1141845,\n    \"avg_ts\": 7.523711,\n    \"stddev_ts\": 0.000502,\n    \"samples_ns\": [ 17011920004, 17012588452, 17014131800 ],\n    \"samples_ts\": [ 7.52414, 7.52384, 7.52316 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:34:04Z\",\n    \"avg_ns\": 102343334459,\n    \"stddev_ns\": 33919472,\n    \"avg_ts\": 5.002769,\n    \"stddev_ts\": 0.001658,\n    \"samples_ns\": [ 102324274228, 102323235931, 102382493220 ],\n    \"samples_ts\": [ 5.0037, 5.00375, 5.00085 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:32:56Z",
+          "avg_ns": 17012880085,
+          "stddev_ns": 1141845,
+          "avg_ts": 7.523711,
+          "stddev_ts": 0.000502,
+          "samples_ns": [
+            17011920004,
+            17012588452,
+            17014131800
+          ],
+          "samples_ts": [
+            7.52414,
+            7.52384,
+            7.52316
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:34:04Z",
+          "avg_ns": 102343334459,
+          "stddev_ns": 33919472,
+          "avg_ts": 5.002769,
+          "stddev_ts": 0.001658,
+          "samples_ns": [
+            102324274228,
+            102323235931,
+            102382493220
+          ],
+          "samples_ts": [
+            5.0037,
+            5.00375,
+            5.00085
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1237
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:45:01.604979+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:39:12Z\",\n    \"avg_ns\": 68237887523,\n    \"stddev_ns\": 2667391,\n    \"avg_ts\": 7.503163,\n    \"stddev_ts\": 0.000293,\n    \"samples_ns\": [ 68237993534, 68235168706, 68240500329 ],\n    \"samples_ts\": [ 7.50315, 7.50346, 7.50288 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:43:45Z\",\n    \"avg_ns\": 25242865232,\n    \"stddev_ns\": 441468,\n    \"avg_ts\": 5.070740,\n    \"stddev_ts\": 0.000089,\n    \"samples_ns\": [ 25242391710, 25242938508, 25243265478 ],\n    \"samples_ts\": [ 5.07083, 5.07073, 5.07066 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:39:12Z",
+          "avg_ns": 68237887523,
+          "stddev_ns": 2667391,
+          "avg_ts": 7.503163,
+          "stddev_ts": 0.000293,
+          "samples_ns": [
+            68237993534,
+            68235168706,
+            68240500329
+          ],
+          "samples_ts": [
+            7.50315,
+            7.50346,
+            7.50288
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:43:45Z",
+          "avg_ns": 25242865232,
+          "stddev_ns": 441468,
+          "avg_ts": 5.07074,
+          "stddev_ts": 8.9e-05,
+          "samples_ns": [
+            25242391710,
+            25242938508,
+            25243265478
+          ],
+          "samples_ts": [
+            5.07083,
+            5.07073,
+            5.07066
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1238
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:54:43.124058+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:45:02Z\",\n    \"avg_ns\": 68244325720,\n    \"stddev_ns\": 5055763,\n    \"avg_ts\": 7.502455,\n    \"stddev_ts\": 0.000555,\n    \"samples_ns\": [ 68244103413, 68249482212, 68239391536 ],\n    \"samples_ts\": [ 7.50248, 7.50189, 7.503 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:49:35Z\",\n    \"avg_ns\": 102361774245,\n    \"stddev_ns\": 133555304,\n    \"avg_ts\": 5.001873,\n    \"stddev_ts\": 0.006531,\n    \"samples_ns\": [ 102449970615, 102427233649, 102208118473 ],\n    \"samples_ts\": [ 4.99756, 4.99867, 5.00939 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:45:02Z",
+          "avg_ns": 68244325720,
+          "stddev_ns": 5055763,
+          "avg_ts": 7.502455,
+          "stddev_ts": 0.000555,
+          "samples_ns": [
+            68244103413,
+            68249482212,
+            68239391536
+          ],
+          "samples_ts": [
+            7.50248,
+            7.50189,
+            7.503
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:49:35Z",
+          "avg_ns": 102361774245,
+          "stddev_ns": 133555304,
+          "avg_ts": 5.001873,
+          "stddev_ts": 0.006531,
+          "samples_ns": [
+            102449970615,
+            102427233649,
+            102208118473
+          ],
+          "samples_ts": [
+            4.99756,
+            4.99867,
+            5.00939
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1239
+    },
+    {
+      "timestamp_utc": "2025-12-11T19:57:08.653358+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:54:44Z\",\n    \"avg_ns\": 17018637938,\n    \"stddev_ns\": 1777279,\n    \"avg_ts\": 7.521166,\n    \"stddev_ts\": 0.000781,\n    \"samples_ns\": [ 17019817700, 17019490558, 17016605558 ],\n    \"samples_ts\": [ 7.52064, 7.52079, 7.52206 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:55:52Z\",\n    \"avg_ns\": 25334991416,\n    \"stddev_ns\": 850032,\n    \"avg_ts\": 5.052301,\n    \"stddev_ts\": 0.000167,\n    \"samples_ns\": [ 25334738921, 25335923521, 25334311807 ],\n    \"samples_ts\": [ 5.05235, 5.05212, 5.05244 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:54:44Z",
+          "avg_ns": 17018637938,
+          "stddev_ns": 1777279,
+          "avg_ts": 7.521166,
+          "stddev_ts": 0.000781,
+          "samples_ns": [
+            17019817700,
+            17019490558,
+            17016605558
+          ],
+          "samples_ts": [
+            7.52064,
+            7.52079,
+            7.52206
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:55:52Z",
+          "avg_ns": 25334991416,
+          "stddev_ns": 850032,
+          "avg_ts": 5.052301,
+          "stddev_ts": 0.000167,
+          "samples_ns": [
+            25334738921,
+            25335923521,
+            25334311807
+          ],
+          "samples_ts": [
+            5.05235,
+            5.05212,
+            5.05244
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1240
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:03:24.760453+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:57:09Z\",\n    \"avg_ns\": 17018013322,\n    \"stddev_ns\": 4808133,\n    \"avg_ts\": 7.521442,\n    \"stddev_ts\": 0.002124,\n    \"samples_ns\": [ 17021285237, 17020257449, 17012497282 ],\n    \"samples_ts\": [ 7.52, 7.52045, 7.52388 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T19:58:17Z\",\n    \"avg_ns\": 102186183963,\n    \"stddev_ns\": 15254639,\n    \"avg_ts\": 5.010462,\n    \"stddev_ts\": 0.000748,\n    \"samples_ns\": [ 102191295054, 102198223260, 102169033576 ],\n    \"samples_ts\": [ 5.01021, 5.00987, 5.0113 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:57:09Z",
+          "avg_ns": 17018013322,
+          "stddev_ns": 4808133,
+          "avg_ts": 7.521442,
+          "stddev_ts": 0.002124,
+          "samples_ns": [
+            17021285237,
+            17020257449,
+            17012497282
+          ],
+          "samples_ts": [
+            7.52,
+            7.52045,
+            7.52388
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T19:58:17Z",
+          "avg_ns": 102186183963,
+          "stddev_ns": 15254639,
+          "avg_ts": 5.010462,
+          "stddev_ts": 0.000748,
+          "samples_ns": [
+            102191295054,
+            102198223260,
+            102169033576
+          ],
+          "samples_ts": [
+            5.01021,
+            5.00987,
+            5.0113
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1241
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:09:15.791302+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:03:25Z\",\n    \"avg_ns\": 68449616852,\n    \"stddev_ns\": 3605558,\n    \"avg_ts\": 7.479954,\n    \"stddev_ts\": 0.000393,\n    \"samples_ns\": [ 68446267571, 68453417131, 68449165855 ],\n    \"samples_ts\": [ 7.48032, 7.47954, 7.48 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:07:59Z\",\n    \"avg_ns\": 25253284882,\n    \"stddev_ns\": 5062155,\n    \"avg_ts\": 5.068648,\n    \"stddev_ts\": 0.001016,\n    \"samples_ns\": [ 25258996339, 25249352184, 25251506123 ],\n    \"samples_ts\": [ 5.0675, 5.06944, 5.069 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:03:25Z",
+          "avg_ns": 68449616852,
+          "stddev_ns": 3605558,
+          "avg_ts": 7.479954,
+          "stddev_ts": 0.000393,
+          "samples_ns": [
+            68446267571,
+            68453417131,
+            68449165855
+          ],
+          "samples_ts": [
+            7.48032,
+            7.47954,
+            7.48
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:07:59Z",
+          "avg_ns": 25253284882,
+          "stddev_ns": 5062155,
+          "avg_ts": 5.068648,
+          "stddev_ts": 0.001016,
+          "samples_ns": [
+            25258996339,
+            25249352184,
+            25251506123
+          ],
+          "samples_ts": [
+            5.0675,
+            5.06944,
+            5.069
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1242
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:18:57.753963+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:09:16Z\",\n    \"avg_ns\": 68409195820,\n    \"stddev_ns\": 12606664,\n    \"avg_ts\": 7.484374,\n    \"stddev_ts\": 0.001379,\n    \"samples_ns\": [ 68396815308, 68422006753, 68408765401 ],\n    \"samples_ts\": [ 7.48573, 7.48297, 7.48442 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:13:50Z\",\n    \"avg_ns\": 102281048667,\n    \"stddev_ns\": 35746498,\n    \"avg_ts\": 5.005815,\n    \"stddev_ts\": 0.001749,\n    \"samples_ns\": [ 102265944390, 102255333588, 102321868023 ],\n    \"samples_ts\": [ 5.00655, 5.00707, 5.00382 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:09:16Z",
+          "avg_ns": 68409195820,
+          "stddev_ns": 12606664,
+          "avg_ts": 7.484374,
+          "stddev_ts": 0.001379,
+          "samples_ns": [
+            68396815308,
+            68422006753,
+            68408765401
+          ],
+          "samples_ts": [
+            7.48573,
+            7.48297,
+            7.48442
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:13:50Z",
+          "avg_ns": 102281048667,
+          "stddev_ns": 35746498,
+          "avg_ts": 5.005815,
+          "stddev_ts": 0.001749,
+          "samples_ns": [
+            102265944390,
+            102255333588,
+            102321868023
+          ],
+          "samples_ts": [
+            5.00655,
+            5.00707,
+            5.00382
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1243
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:21:23.223723+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:18:58Z\",\n    \"avg_ns\": 17016903022,\n    \"stddev_ns\": 3920239,\n    \"avg_ts\": 7.521933,\n    \"stddev_ts\": 0.001732,\n    \"samples_ns\": [ 17012408287, 17018703987, 17019596793 ],\n    \"samples_ts\": [ 7.52392, 7.52114, 7.52074 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:20:06Z\",\n    \"avg_ns\": 25322377826,\n    \"stddev_ns\": 7034810,\n    \"avg_ts\": 5.054818,\n    \"stddev_ts\": 0.001404,\n    \"samples_ns\": [ 25330132779, 25320588116, 25316412584 ],\n    \"samples_ts\": [ 5.05327, 5.05517, 5.05601 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:18:58Z",
+          "avg_ns": 17016903022,
+          "stddev_ns": 3920239,
+          "avg_ts": 7.521933,
+          "stddev_ts": 0.001732,
+          "samples_ns": [
+            17012408287,
+            17018703987,
+            17019596793
+          ],
+          "samples_ts": [
+            7.52392,
+            7.52114,
+            7.52074
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:20:06Z",
+          "avg_ns": 25322377826,
+          "stddev_ns": 7034810,
+          "avg_ts": 5.054818,
+          "stddev_ts": 0.001404,
+          "samples_ns": [
+            25330132779,
+            25320588116,
+            25316412584
+          ],
+          "samples_ts": [
+            5.05327,
+            5.05517,
+            5.05601
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1244
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:27:39.435347+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:21:24Z\",\n    \"avg_ns\": 17008839346,\n    \"stddev_ns\": 2252270,\n    \"avg_ts\": 7.525499,\n    \"stddev_ts\": 0.000997,\n    \"samples_ns\": [ 17009876044, 17006255406, 17010386588 ],\n    \"samples_ts\": [ 7.52504, 7.52664, 7.52481 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:22:32Z\",\n    \"avg_ns\": 102242546195,\n    \"stddev_ns\": 29667695,\n    \"avg_ts\": 5.007700,\n    \"stddev_ts\": 0.001453,\n    \"samples_ns\": [ 102275517619, 102234106217, 102218014750 ],\n    \"samples_ts\": [ 5.00609, 5.00811, 5.0089 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:21:24Z",
+          "avg_ns": 17008839346,
+          "stddev_ns": 2252270,
+          "avg_ts": 7.525499,
+          "stddev_ts": 0.000997,
+          "samples_ns": [
+            17009876044,
+            17006255406,
+            17010386588
+          ],
+          "samples_ts": [
+            7.52504,
+            7.52664,
+            7.52481
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:22:32Z",
+          "avg_ns": 102242546195,
+          "stddev_ns": 29667695,
+          "avg_ts": 5.0077,
+          "stddev_ts": 0.001453,
+          "samples_ns": [
+            102275517619,
+            102234106217,
+            102218014750
+          ],
+          "samples_ts": [
+            5.00609,
+            5.00811,
+            5.0089
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1245
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:33:32.559128+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:27:40Z\",\n    \"avg_ns\": 68950806513,\n    \"stddev_ns\": 2107784,\n    \"avg_ts\": 7.425584,\n    \"stddev_ts\": 0.000225,\n    \"samples_ns\": [ 68948489134, 68951376884, 68952553522 ],\n    \"samples_ts\": [ 7.42583, 7.42552, 7.4254 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:32:16Z\",\n    \"avg_ns\": 25286030698,\n    \"stddev_ns\": 8347608,\n    \"avg_ts\": 5.062084,\n    \"stddev_ts\": 0.001671,\n    \"samples_ns\": [ 25282166111, 25295608645, 25280317339 ],\n    \"samples_ts\": [ 5.06286, 5.06017, 5.06323 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:27:40Z",
+          "avg_ns": 68950806513,
+          "stddev_ns": 2107784,
+          "avg_ts": 7.425584,
+          "stddev_ts": 0.000225,
+          "samples_ns": [
+            68948489134,
+            68951376884,
+            68952553522
+          ],
+          "samples_ts": [
+            7.42583,
+            7.42552,
+            7.4254
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:32:16Z",
+          "avg_ns": 25286030698,
+          "stddev_ns": 8347608,
+          "avg_ts": 5.062084,
+          "stddev_ts": 0.001671,
+          "samples_ns": [
+            25282166111,
+            25295608645,
+            25280317339
+          ],
+          "samples_ts": [
+            5.06286,
+            5.06017,
+            5.06323
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1246
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:43:17.476999+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:33:33Z\",\n    \"avg_ns\": 68955164379,\n    \"stddev_ns\": 9831867,\n    \"avg_ts\": 7.425115,\n    \"stddev_ts\": 0.001058,\n    \"samples_ns\": [ 68966044376, 68946941045, 68952507718 ],\n    \"samples_ts\": [ 7.42394, 7.426, 7.4254 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:38:09Z\",\n    \"avg_ns\": 102542610453,\n    \"stddev_ns\": 87047337,\n    \"avg_ts\": 4.993049,\n    \"stddev_ts\": 0.004238,\n    \"samples_ns\": [ 102456028967, 102630116281, 102541686111 ],\n    \"samples_ts\": [ 4.99727, 4.98879, 4.99309 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:33:33Z",
+          "avg_ns": 68955164379,
+          "stddev_ns": 9831867,
+          "avg_ts": 7.425115,
+          "stddev_ts": 0.001058,
+          "samples_ns": [
+            68966044376,
+            68946941045,
+            68952507718
+          ],
+          "samples_ts": [
+            7.42394,
+            7.426,
+            7.4254
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:38:09Z",
+          "avg_ns": 102542610453,
+          "stddev_ns": 87047337,
+          "avg_ts": 4.993049,
+          "stddev_ts": 0.004238,
+          "samples_ns": [
+            102456028967,
+            102630116281,
+            102541686111
+          ],
+          "samples_ts": [
+            4.99727,
+            4.98879,
+            4.99309
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1247
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:45:42.801331+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:43:18Z\",\n    \"avg_ns\": 17013837641,\n    \"stddev_ns\": 1487067,\n    \"avg_ts\": 7.523288,\n    \"stddev_ts\": 0.000655,\n    \"samples_ns\": [ 17015076032, 17014240282, 17012196610 ],\n    \"samples_ts\": [ 7.52274, 7.52311, 7.52401 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:44:26Z\",\n    \"avg_ns\": 25267438729,\n    \"stddev_ns\": 5337704,\n    \"avg_ts\": 5.065808,\n    \"stddev_ts\": 0.001070,\n    \"samples_ns\": [ 25273559029, 25263748073, 25265009085 ],\n    \"samples_ts\": [ 5.06458, 5.06655, 5.0663 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:43:18Z",
+          "avg_ns": 17013837641,
+          "stddev_ns": 1487067,
+          "avg_ts": 7.523288,
+          "stddev_ts": 0.000655,
+          "samples_ns": [
+            17015076032,
+            17014240282,
+            17012196610
+          ],
+          "samples_ts": [
+            7.52274,
+            7.52311,
+            7.52401
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:44:26Z",
+          "avg_ns": 25267438729,
+          "stddev_ns": 5337704,
+          "avg_ts": 5.065808,
+          "stddev_ts": 0.00107,
+          "samples_ns": [
+            25273559029,
+            25263748073,
+            25265009085
+          ],
+          "samples_ts": [
+            5.06458,
+            5.06655,
+            5.0663
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1248
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:51:59.435077+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:45:43Z\",\n    \"avg_ns\": 17010867936,\n    \"stddev_ns\": 2819298,\n    \"avg_ts\": 7.524601,\n    \"stddev_ts\": 0.001245,\n    \"samples_ns\": [ 17012486875, 17012497463, 17007619472 ],\n    \"samples_ts\": [ 7.52389, 7.52388, 7.52604 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:46:52Z\",\n    \"avg_ns\": 102378090309,\n    \"stddev_ns\": 73884248,\n    \"avg_ts\": 5.001072,\n    \"stddev_ts\": 0.003608,\n    \"samples_ns\": [ 102343045085, 102328250811, 102462975032 ],\n    \"samples_ts\": [ 5.00278, 5.00351, 4.99693 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:45:43Z",
+          "avg_ns": 17010867936,
+          "stddev_ns": 2819298,
+          "avg_ts": 7.524601,
+          "stddev_ts": 0.001245,
+          "samples_ns": [
+            17012486875,
+            17012497463,
+            17007619472
+          ],
+          "samples_ts": [
+            7.52389,
+            7.52388,
+            7.52604
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:46:52Z",
+          "avg_ns": 102378090309,
+          "stddev_ns": 73884248,
+          "avg_ts": 5.001072,
+          "stddev_ts": 0.003608,
+          "samples_ns": [
+            102343045085,
+            102328250811,
+            102462975032
+          ],
+          "samples_ts": [
+            5.00278,
+            5.00351,
+            4.99693
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1249
+    },
+    {
+      "timestamp_utc": "2025-12-11T20:57:49.530308+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:52:00Z\",\n    \"avg_ns\": 68228104778,\n    \"stddev_ns\": 2358733,\n    \"avg_ts\": 7.504239,\n    \"stddev_ts\": 0.000258,\n    \"samples_ns\": [ 68229643655, 68229263886, 68225406794 ],\n    \"samples_ts\": [ 7.50407, 7.50411, 7.50454 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:56:33Z\",\n    \"avg_ns\": 25243496831,\n    \"stddev_ns\": 9196486,\n    \"avg_ts\": 5.070613,\n    \"stddev_ts\": 0.001847,\n    \"samples_ns\": [ 25234849702, 25253156170, 25242484622 ],\n    \"samples_ts\": [ 5.07235, 5.06867, 5.07082 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:52:00Z",
+          "avg_ns": 68228104778,
+          "stddev_ns": 2358733,
+          "avg_ts": 7.504239,
+          "stddev_ts": 0.000258,
+          "samples_ns": [
+            68229643655,
+            68229263886,
+            68225406794
+          ],
+          "samples_ts": [
+            7.50407,
+            7.50411,
+            7.50454
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:56:33Z",
+          "avg_ns": 25243496831,
+          "stddev_ns": 9196486,
+          "avg_ts": 5.070613,
+          "stddev_ts": 0.001847,
+          "samples_ns": [
+            25234849702,
+            25253156170,
+            25242484622
+          ],
+          "samples_ts": [
+            5.07235,
+            5.06867,
+            5.07082
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1250
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:07:30.751769+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T20:57:50Z\",\n    \"avg_ns\": 68234666946,\n    \"stddev_ns\": 3568852,\n    \"avg_ts\": 7.503517,\n    \"stddev_ts\": 0.000390,\n    \"samples_ns\": [ 68233726536, 68238592143, 68231682161 ],\n    \"samples_ts\": [ 7.50362, 7.50309, 7.50385 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:02:23Z\",\n    \"avg_ns\": 102276603921,\n    \"stddev_ns\": 4242815,\n    \"avg_ts\": 5.006032,\n    \"stddev_ts\": 0.000206,\n    \"samples_ns\": [ 102280105928, 102271920532, 102277785305 ],\n    \"samples_ts\": [ 5.00586, 5.00626, 5.00597 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T20:57:50Z",
+          "avg_ns": 68234666946,
+          "stddev_ns": 3568852,
+          "avg_ts": 7.503517,
+          "stddev_ts": 0.00039,
+          "samples_ns": [
+            68233726536,
+            68238592143,
+            68231682161
+          ],
+          "samples_ts": [
+            7.50362,
+            7.50309,
+            7.50385
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:02:23Z",
+          "avg_ns": 102276603921,
+          "stddev_ns": 4242815,
+          "avg_ts": 5.006032,
+          "stddev_ts": 0.000206,
+          "samples_ns": [
+            102280105928,
+            102271920532,
+            102277785305
+          ],
+          "samples_ts": [
+            5.00586,
+            5.00626,
+            5.00597
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1251
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:09:56.124514+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:07:31Z\",\n    \"avg_ns\": 17019144942,\n    \"stddev_ns\": 3819055,\n    \"avg_ts\": 7.520942,\n    \"stddev_ts\": 0.001687,\n    \"samples_ns\": [ 17022021015, 17020599019, 17014814793 ],\n    \"samples_ts\": [ 7.51967, 7.5203, 7.52286 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:08:40Z\",\n    \"avg_ns\": 25278019297,\n    \"stddev_ns\": 8495143,\n    \"avg_ts\": 5.063688,\n    \"stddev_ts\": 0.001702,\n    \"samples_ns\": [ 25278573820, 25269261967, 25286222105 ],\n    \"samples_ts\": [ 5.06358, 5.06544, 5.06205 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:07:31Z",
+          "avg_ns": 17019144942,
+          "stddev_ns": 3819055,
+          "avg_ts": 7.520942,
+          "stddev_ts": 0.001687,
+          "samples_ns": [
+            17022021015,
+            17020599019,
+            17014814793
+          ],
+          "samples_ts": [
+            7.51967,
+            7.5203,
+            7.52286
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:08:40Z",
+          "avg_ns": 25278019297,
+          "stddev_ns": 8495143,
+          "avg_ts": 5.063688,
+          "stddev_ts": 0.001702,
+          "samples_ns": [
+            25278573820,
+            25269261967,
+            25286222105
+          ],
+          "samples_ts": [
+            5.06358,
+            5.06544,
+            5.06205
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1252
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:16:13.560396+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:09:57Z\",\n    \"avg_ns\": 16999345821,\n    \"stddev_ns\": 13987394,\n    \"avg_ts\": 7.529705,\n    \"stddev_ts\": 0.006197,\n    \"samples_ns\": [ 17011650280, 16984132829, 17002254354 ],\n    \"samples_ts\": [ 7.52426, 7.53645, 7.52841 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:11:05Z\",\n    \"avg_ns\": 102655033582,\n    \"stddev_ns\": 817609732,\n    \"avg_ts\": 4.987788,\n    \"stddev_ts\": 0.039544,\n    \"samples_ns\": [ 102180673721, 102185302974, 103599124053 ],\n    \"samples_ts\": [ 5.01073, 5.01051, 4.94213 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:09:57Z",
+          "avg_ns": 16999345821,
+          "stddev_ns": 13987394,
+          "avg_ts": 7.529705,
+          "stddev_ts": 0.006197,
+          "samples_ns": [
+            17011650280,
+            16984132829,
+            17002254354
+          ],
+          "samples_ts": [
+            7.52426,
+            7.53645,
+            7.52841
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:11:05Z",
+          "avg_ns": 102655033582,
+          "stddev_ns": 817609732,
+          "avg_ts": 4.987788,
+          "stddev_ts": 0.039544,
+          "samples_ns": [
+            102180673721,
+            102185302974,
+            103599124053
+          ],
+          "samples_ts": [
+            5.01073,
+            5.01051,
+            4.94213
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1253
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:22:04.585317+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:16:14Z\",\n    \"avg_ns\": 68449480541,\n    \"stddev_ns\": 8132171,\n    \"avg_ts\": 7.479969,\n    \"stddev_ts\": 0.000889,\n    \"samples_ns\": [ 68442011517, 68458143792, 68448286314 ],\n    \"samples_ts\": [ 7.48079, 7.47902, 7.4801 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:20:48Z\",\n    \"avg_ns\": 25257771980,\n    \"stddev_ns\": 9971856,\n    \"avg_ts\": 5.067748,\n    \"stddev_ts\": 0.002000,\n    \"samples_ns\": [ 25252535558, 25269269807, 25251510576 ],\n    \"samples_ts\": [ 5.0688, 5.06544, 5.069 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:16:14Z",
+          "avg_ns": 68449480541,
+          "stddev_ns": 8132171,
+          "avg_ts": 7.479969,
+          "stddev_ts": 0.000889,
+          "samples_ns": [
+            68442011517,
+            68458143792,
+            68448286314
+          ],
+          "samples_ts": [
+            7.48079,
+            7.47902,
+            7.4801
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:20:48Z",
+          "avg_ns": 25257771980,
+          "stddev_ns": 9971856,
+          "avg_ts": 5.067748,
+          "stddev_ts": 0.002,
+          "samples_ns": [
+            25252535558,
+            25269269807,
+            25251510576
+          ],
+          "samples_ts": [
+            5.0688,
+            5.06544,
+            5.069
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1254
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:31:46.393443+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:22:05Z\",\n    \"avg_ns\": 68439663775,\n    \"stddev_ns\": 26454402,\n    \"avg_ts\": 7.481043,\n    \"stddev_ts\": 0.002892,\n    \"samples_ns\": [ 68455661787, 68454201031, 68409128507 ],\n    \"samples_ts\": [ 7.47929, 7.47945, 7.48438 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:26:39Z\",\n    \"avg_ns\": 102191821829,\n    \"stddev_ns\": 42591213,\n    \"avg_ts\": 5.010186,\n    \"stddev_ts\": 0.002088,\n    \"samples_ns\": [ 102239912470, 102176685887, 102158867131 ],\n    \"samples_ts\": [ 5.00783, 5.01093, 5.0118 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:22:05Z",
+          "avg_ns": 68439663775,
+          "stddev_ns": 26454402,
+          "avg_ts": 7.481043,
+          "stddev_ts": 0.002892,
+          "samples_ns": [
+            68455661787,
+            68454201031,
+            68409128507
+          ],
+          "samples_ts": [
+            7.47929,
+            7.47945,
+            7.48438
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:26:39Z",
+          "avg_ns": 102191821829,
+          "stddev_ns": 42591213,
+          "avg_ts": 5.010186,
+          "stddev_ts": 0.002088,
+          "samples_ns": [
+            102239912470,
+            102176685887,
+            102158867131
+          ],
+          "samples_ts": [
+            5.00783,
+            5.01093,
+            5.0118
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1255
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:34:11.679828+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:31:47Z\",\n    \"avg_ns\": 17010361047,\n    \"stddev_ns\": 4861293,\n    \"avg_ts\": 7.524826,\n    \"stddev_ts\": 0.002149,\n    \"samples_ns\": [ 17012708090, 17004775418, 17013599635 ],\n    \"samples_ts\": [ 7.52379, 7.5273, 7.52339 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:32:55Z\",\n    \"avg_ns\": 25262339438,\n    \"stddev_ns\": 10699900,\n    \"avg_ts\": 5.066831,\n    \"stddev_ts\": 0.002145,\n    \"samples_ns\": [ 25274551790, 25254618808, 25257847717 ],\n    \"samples_ts\": [ 5.06438, 5.06838, 5.06773 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:31:47Z",
+          "avg_ns": 17010361047,
+          "stddev_ns": 4861293,
+          "avg_ts": 7.524826,
+          "stddev_ts": 0.002149,
+          "samples_ns": [
+            17012708090,
+            17004775418,
+            17013599635
+          ],
+          "samples_ts": [
+            7.52379,
+            7.5273,
+            7.52339
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:32:55Z",
+          "avg_ns": 25262339438,
+          "stddev_ns": 10699900,
+          "avg_ts": 5.066831,
+          "stddev_ts": 0.002145,
+          "samples_ns": [
+            25274551790,
+            25254618808,
+            25257847717
+          ],
+          "samples_ts": [
+            5.06438,
+            5.06838,
+            5.06773
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1256
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:40:29.307634+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:34:12Z\",\n    \"avg_ns\": 17017951864,\n    \"stddev_ns\": 2900637,\n    \"avg_ts\": 7.521469,\n    \"stddev_ts\": 0.001282,\n    \"samples_ns\": [ 17018428394, 17014842469, 17020584729 ],\n    \"samples_ts\": [ 7.52126, 7.52284, 7.52031 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:35:20Z\",\n    \"avg_ns\": 102701350412,\n    \"stddev_ns\": 708948310,\n    \"avg_ts\": 4.985487,\n    \"stddev_ts\": 0.034279,\n    \"samples_ns\": [ 102321862398, 102262923156, 103519265684 ],\n    \"samples_ts\": [ 5.00382, 5.0067, 4.94594 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:34:12Z",
+          "avg_ns": 17017951864,
+          "stddev_ns": 2900637,
+          "avg_ts": 7.521469,
+          "stddev_ts": 0.001282,
+          "samples_ns": [
+            17018428394,
+            17014842469,
+            17020584729
+          ],
+          "samples_ts": [
+            7.52126,
+            7.52284,
+            7.52031
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:35:20Z",
+          "avg_ns": 102701350412,
+          "stddev_ns": 708948310,
+          "avg_ts": 4.985487,
+          "stddev_ts": 0.034279,
+          "samples_ns": [
+            102321862398,
+            102262923156,
+            103519265684
+          ],
+          "samples_ts": [
+            5.00382,
+            5.0067,
+            4.94594
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1257
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:46:22.378302+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:40:30Z\",\n    \"avg_ns\": 68960255667,\n    \"stddev_ns\": 3331976,\n    \"avg_ts\": 7.424566,\n    \"stddev_ts\": 0.000358,\n    \"samples_ns\": [ 68964089472, 68958240975, 68958436555 ],\n    \"samples_ts\": [ 7.42415, 7.42478, 7.42476 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:45:06Z\",\n    \"avg_ns\": 25249038040,\n    \"stddev_ns\": 6395808,\n    \"avg_ts\": 5.069500,\n    \"stddev_ts\": 0.001283,\n    \"samples_ns\": [ 25255950701, 25247821625, 25243341796 ],\n    \"samples_ts\": [ 5.06811, 5.06974, 5.07064 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:40:30Z",
+          "avg_ns": 68960255667,
+          "stddev_ns": 3331976,
+          "avg_ts": 7.424566,
+          "stddev_ts": 0.000358,
+          "samples_ns": [
+            68964089472,
+            68958240975,
+            68958436555
+          ],
+          "samples_ts": [
+            7.42415,
+            7.42478,
+            7.42476
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:45:06Z",
+          "avg_ns": 25249038040,
+          "stddev_ns": 6395808,
+          "avg_ts": 5.0695,
+          "stddev_ts": 0.001283,
+          "samples_ns": [
+            25255950701,
+            25247821625,
+            25243341796
+          ],
+          "samples_ts": [
+            5.06811,
+            5.06974,
+            5.07064
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1258
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:56:06.972387+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:46:23Z\",\n    \"avg_ns\": 68969073064,\n    \"stddev_ns\": 5277191,\n    \"avg_ts\": 7.423617,\n    \"stddev_ts\": 0.000568,\n    \"samples_ns\": [ 68974558025, 68964031637, 68968629530 ],\n    \"samples_ts\": [ 7.42303, 7.42416, 7.42366 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:50:59Z\",\n    \"avg_ns\": 102418827883,\n    \"stddev_ns\": 234879902,\n    \"avg_ts\": 4.999098,\n    \"stddev_ts\": 0.011450,\n    \"samples_ns\": [ 102272496712, 102294233874, 102689753064 ],\n    \"samples_ts\": [ 5.00623, 5.00517, 4.98589 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:46:23Z",
+          "avg_ns": 68969073064,
+          "stddev_ns": 5277191,
+          "avg_ts": 7.423617,
+          "stddev_ts": 0.000568,
+          "samples_ns": [
+            68974558025,
+            68964031637,
+            68968629530
+          ],
+          "samples_ts": [
+            7.42303,
+            7.42416,
+            7.42366
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:50:59Z",
+          "avg_ns": 102418827883,
+          "stddev_ns": 234879902,
+          "avg_ts": 4.999098,
+          "stddev_ts": 0.01145,
+          "samples_ns": [
+            102272496712,
+            102294233874,
+            102689753064
+          ],
+          "samples_ts": [
+            5.00623,
+            5.00517,
+            4.98589
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1259
+    },
+    {
+      "timestamp_utc": "2025-12-11T21:58:09.205140+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:56:08Z\",\n    \"avg_ns\": 12924695688,\n    \"stddev_ns\": 10762586,\n    \"avg_ts\": 9.903526,\n    \"stddev_ts\": 0.008245,\n    \"samples_ns\": [ 12936295835, 12915034175, 12922757054 ],\n    \"samples_ts\": [ 9.89464, 9.91093, 9.90501 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:56:59Z\",\n    \"avg_ns\": 23038240806,\n    \"stddev_ns\": 58435865,\n    \"avg_ts\": 5.556004,\n    \"stddev_ts\": 0.014112,\n    \"samples_ns\": [ 22971196640, 23078361156, 23065164623 ],\n    \"samples_ts\": [ 5.5722, 5.54632, 5.54949 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:56:08Z",
+          "avg_ns": 12924695688,
+          "stddev_ns": 10762586,
+          "avg_ts": 9.903526,
+          "stddev_ts": 0.008245,
+          "samples_ns": [
+            12936295835,
+            12915034175,
+            12922757054
+          ],
+          "samples_ts": [
+            9.89464,
+            9.91093,
+            9.90501
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:56:59Z",
+          "avg_ns": 23038240806,
+          "stddev_ns": 58435865,
+          "avg_ts": 5.556004,
+          "stddev_ts": 0.014112,
+          "samples_ns": [
+            22971196640,
+            23078361156,
+            23065164623
+          ],
+          "samples_ts": [
+            5.5722,
+            5.54632,
+            5.54949
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1260
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:03:43.687675+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:58:10Z\",\n    \"avg_ns\": 12921741804,\n    \"stddev_ns\": 6510995,\n    \"avg_ts\": 9.905787,\n    \"stddev_ts\": 0.004991,\n    \"samples_ns\": [ 12923677095, 12927063691, 12914484628 ],\n    \"samples_ts\": [ 9.9043, 9.90171, 9.91135 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T21:59:02Z\",\n    \"avg_ns\": 93779955011,\n    \"stddev_ns\": 65894277,\n    \"avg_ts\": 5.459591,\n    \"stddev_ts\": 0.003837,\n    \"samples_ns\": [ 93796611217, 93707330844, 93835922972 ],\n    \"samples_ts\": [ 5.45862, 5.46382, 5.45633 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:58:10Z",
+          "avg_ns": 12921741804,
+          "stddev_ns": 6510995,
+          "avg_ts": 9.905787,
+          "stddev_ts": 0.004991,
+          "samples_ns": [
+            12923677095,
+            12927063691,
+            12914484628
+          ],
+          "samples_ts": [
+            9.9043,
+            9.90171,
+            9.91135
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T21:59:02Z",
+          "avg_ns": 93779955011,
+          "stddev_ns": 65894277,
+          "avg_ts": 5.459591,
+          "stddev_ts": 0.003837,
+          "samples_ns": [
+            93796611217,
+            93707330844,
+            93835922972
+          ],
+          "samples_ts": [
+            5.45862,
+            5.46382,
+            5.45633
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1261
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:08:21.734080+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:03:44Z\",\n    \"avg_ns\": 51806090249,\n    \"stddev_ns\": 22305441,\n    \"avg_ts\": 9.883009,\n    \"stddev_ts\": 0.004254,\n    \"samples_ns\": [ 51830540302, 51786856150, 51800874296 ],\n    \"samples_ts\": [ 9.87835, 9.88668, 9.884 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:07:12Z\",\n    \"avg_ns\": 23130216386,\n    \"stddev_ns\": 53052811,\n    \"avg_ts\": 5.533906,\n    \"stddev_ts\": 0.012710,\n    \"samples_ns\": [ 23069003269, 23158756484, 23162889407 ],\n    \"samples_ts\": [ 5.54857, 5.52707, 5.52608 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:03:44Z",
+          "avg_ns": 51806090249,
+          "stddev_ns": 22305441,
+          "avg_ts": 9.883009,
+          "stddev_ts": 0.004254,
+          "samples_ns": [
+            51830540302,
+            51786856150,
+            51800874296
+          ],
+          "samples_ts": [
+            9.87835,
+            9.88668,
+            9.884
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:07:12Z",
+          "avg_ns": 23130216386,
+          "stddev_ns": 53052811,
+          "avg_ts": 5.533906,
+          "stddev_ts": 0.01271,
+          "samples_ns": [
+            23069003269,
+            23158756484,
+            23162889407
+          ],
+          "samples_ts": [
+            5.54857,
+            5.52707,
+            5.52608
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1262
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:16:31.455457+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:08:22Z\",\n    \"avg_ns\": 51829222141,\n    \"stddev_ns\": 13750735,\n    \"avg_ts\": 9.878597,\n    \"stddev_ts\": 0.002620,\n    \"samples_ns\": [ 51843838649, 51816552232, 51827275544 ],\n    \"samples_ts\": [ 9.87581, 9.88101, 9.87897 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:11:50Z\",\n    \"avg_ns\": 93661747509,\n    \"stddev_ns\": 68298392,\n    \"avg_ts\": 5.466481,\n    \"stddev_ts\": 0.003987,\n    \"samples_ns\": [ 93585504938, 93717326910, 93682410681 ],\n    \"samples_ts\": [ 5.47093, 5.46324, 5.46527 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:08:22Z",
+          "avg_ns": 51829222141,
+          "stddev_ns": 13750735,
+          "avg_ts": 9.878597,
+          "stddev_ts": 0.00262,
+          "samples_ns": [
+            51843838649,
+            51816552232,
+            51827275544
+          ],
+          "samples_ts": [
+            9.87581,
+            9.88101,
+            9.87897
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:11:50Z",
+          "avg_ns": 93661747509,
+          "stddev_ns": 68298392,
+          "avg_ts": 5.466481,
+          "stddev_ts": 0.003987,
+          "samples_ns": [
+            93585504938,
+            93717326910,
+            93682410681
+          ],
+          "samples_ts": [
+            5.47093,
+            5.46324,
+            5.46527
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1263
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:18:33.747209+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:16:32Z\",\n    \"avg_ns\": 12927932017,\n    \"stddev_ns\": 9770752,\n    \"avg_ts\": 9.901046,\n    \"stddev_ts\": 0.007479,\n    \"samples_ns\": [ 12923423634, 12921230175, 12939142243 ],\n    \"samples_ts\": [ 9.9045, 9.90618, 9.89246 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:17:24Z\",\n    \"avg_ns\": 23050065821,\n    \"stddev_ns\": 39375965,\n    \"avg_ts\": 5.553140,\n    \"stddev_ts\": 0.009494,\n    \"samples_ns\": [ 23005510898, 23064496971, 23080189596 ],\n    \"samples_ts\": [ 5.56388, 5.54965, 5.54588 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:16:32Z",
+          "avg_ns": 12927932017,
+          "stddev_ns": 9770752,
+          "avg_ts": 9.901046,
+          "stddev_ts": 0.007479,
+          "samples_ns": [
+            12923423634,
+            12921230175,
+            12939142243
+          ],
+          "samples_ts": [
+            9.9045,
+            9.90618,
+            9.89246
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:17:24Z",
+          "avg_ns": 23050065821,
+          "stddev_ns": 39375965,
+          "avg_ts": 5.55314,
+          "stddev_ts": 0.009494,
+          "samples_ns": [
+            23005510898,
+            23064496971,
+            23080189596
+          ],
+          "samples_ts": [
+            5.56388,
+            5.54965,
+            5.54588
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1264
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:24:07.736370+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:18:34Z\",\n    \"avg_ns\": 12920892753,\n    \"stddev_ns\": 6287031,\n    \"avg_ts\": 9.906438,\n    \"stddev_ts\": 0.004820,\n    \"samples_ns\": [ 12924986084, 12913656252, 12924035925 ],\n    \"samples_ts\": [ 9.9033, 9.91199, 9.90403 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:19:26Z\",\n    \"avg_ns\": 93615694430,\n    \"stddev_ns\": 40861437,\n    \"avg_ts\": 5.469169,\n    \"stddev_ts\": 0.002387,\n    \"samples_ns\": [ 93591618438, 93592592372, 93662872481 ],\n    \"samples_ts\": [ 5.47058, 5.47052, 5.46641 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:18:34Z",
+          "avg_ns": 12920892753,
+          "stddev_ns": 6287031,
+          "avg_ts": 9.906438,
+          "stddev_ts": 0.00482,
+          "samples_ns": [
+            12924986084,
+            12913656252,
+            12924035925
+          ],
+          "samples_ts": [
+            9.9033,
+            9.91199,
+            9.90403
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:19:26Z",
+          "avg_ns": 93615694430,
+          "stddev_ns": 40861437,
+          "avg_ts": 5.469169,
+          "stddev_ts": 0.002387,
+          "samples_ns": [
+            93591618438,
+            93592592372,
+            93662872481
+          ],
+          "samples_ts": [
+            5.47058,
+            5.47052,
+            5.46641
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1265
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:28:46.400337+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:24:08Z\",\n    \"avg_ns\": 52024722800,\n    \"stddev_ns\": 12235991,\n    \"avg_ts\": 9.841475,\n    \"stddev_ts\": 0.002315,\n    \"samples_ns\": [ 52010783710, 52029706448, 52033678243 ],\n    \"samples_ts\": [ 9.84411, 9.84053, 9.83978 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:27:37Z\",\n    \"avg_ns\": 23036975987,\n    \"stddev_ns\": 122268671,\n    \"avg_ts\": 5.556389,\n    \"stddev_ts\": 0.029581,\n    \"samples_ns\": [ 22895863435, 23103654035, 23111410492 ],\n    \"samples_ts\": [ 5.59053, 5.54025, 5.53839 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:24:08Z",
+          "avg_ns": 52024722800,
+          "stddev_ns": 12235991,
+          "avg_ts": 9.841475,
+          "stddev_ts": 0.002315,
+          "samples_ns": [
+            52010783710,
+            52029706448,
+            52033678243
+          ],
+          "samples_ts": [
+            9.84411,
+            9.84053,
+            9.83978
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:27:37Z",
+          "avg_ns": 23036975987,
+          "stddev_ns": 122268671,
+          "avg_ts": 5.556389,
+          "stddev_ts": 0.029581,
+          "samples_ns": [
+            22895863435,
+            23103654035,
+            23111410492
+          ],
+          "samples_ts": [
+            5.59053,
+            5.54025,
+            5.53839
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1266
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:36:57.297223+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:28:47Z\",\n    \"avg_ns\": 52034233847,\n    \"stddev_ns\": 26144456,\n    \"avg_ts\": 9.839678,\n    \"stddev_ts\": 0.004942,\n    \"samples_ns\": [ 52064178105, 52015947106, 52022576331 ],\n    \"samples_ts\": [ 9.83402, 9.84314, 9.84188 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:32:15Z\",\n    \"avg_ns\": 93766520805,\n    \"stddev_ns\": 59059935,\n    \"avg_ts\": 5.460373,\n    \"stddev_ts\": 0.003438,\n    \"samples_ns\": [ 93740031347, 93725344768, 93834186302 ],\n    \"samples_ts\": [ 5.46191, 5.46277, 5.45643 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:28:47Z",
+          "avg_ns": 52034233847,
+          "stddev_ns": 26144456,
+          "avg_ts": 9.839678,
+          "stddev_ts": 0.004942,
+          "samples_ns": [
+            52064178105,
+            52015947106,
+            52022576331
+          ],
+          "samples_ts": [
+            9.83402,
+            9.84314,
+            9.84188
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:32:15Z",
+          "avg_ns": 93766520805,
+          "stddev_ns": 59059935,
+          "avg_ts": 5.460373,
+          "stddev_ts": 0.003438,
+          "samples_ns": [
+            93740031347,
+            93725344768,
+            93834186302
+          ],
+          "samples_ts": [
+            5.46191,
+            5.46277,
+            5.45643
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1267
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:38:59.539496+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:36:58Z\",\n    \"avg_ns\": 12918558177,\n    \"stddev_ns\": 7192802,\n    \"avg_ts\": 9.908229,\n    \"stddev_ts\": 0.005515,\n    \"samples_ns\": [ 12926214083, 12917514270, 12911946180 ],\n    \"samples_ts\": [ 9.90236, 9.90903, 9.9133 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:37:50Z\",\n    \"avg_ns\": 23044248318,\n    \"stddev_ns\": 46528010,\n    \"avg_ts\": 5.554546,\n    \"stddev_ts\": 0.011219,\n    \"samples_ns\": [ 22995387203, 23088025098, 23049332654 ],\n    \"samples_ts\": [ 5.56633, 5.544, 5.55331 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:36:58Z",
+          "avg_ns": 12918558177,
+          "stddev_ns": 7192802,
+          "avg_ts": 9.908229,
+          "stddev_ts": 0.005515,
+          "samples_ns": [
+            12926214083,
+            12917514270,
+            12911946180
+          ],
+          "samples_ts": [
+            9.90236,
+            9.90903,
+            9.9133
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:37:50Z",
+          "avg_ns": 23044248318,
+          "stddev_ns": 46528010,
+          "avg_ts": 5.554546,
+          "stddev_ts": 0.011219,
+          "samples_ns": [
+            22995387203,
+            23088025098,
+            23049332654
+          ],
+          "samples_ts": [
+            5.56633,
+            5.544,
+            5.55331
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1268
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:44:33.796462+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:39:00Z\",\n    \"avg_ns\": 12921536672,\n    \"stddev_ns\": 4288045,\n    \"avg_ts\": 9.905943,\n    \"stddev_ts\": 0.003285,\n    \"samples_ns\": [ 12916747659, 12922853913, 12925008446 ],\n    \"samples_ts\": [ 9.90962, 9.90493, 9.90328 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:39:52Z\",\n    \"avg_ns\": 93701171287,\n    \"stddev_ns\": 41252890,\n    \"avg_ts\": 5.464180,\n    \"stddev_ts\": 0.002405,\n    \"samples_ns\": [ 93669163014, 93686625299, 93747725549 ],\n    \"samples_ts\": [ 5.46605, 5.46503, 5.46147 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:39:00Z",
+          "avg_ns": 12921536672,
+          "stddev_ns": 4288045,
+          "avg_ts": 9.905943,
+          "stddev_ts": 0.003285,
+          "samples_ns": [
+            12916747659,
+            12922853913,
+            12925008446
+          ],
+          "samples_ts": [
+            9.90962,
+            9.90493,
+            9.90328
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:39:52Z",
+          "avg_ns": 93701171287,
+          "stddev_ns": 41252890,
+          "avg_ts": 5.46418,
+          "stddev_ts": 0.002405,
+          "samples_ns": [
+            93669163014,
+            93686625299,
+            93747725549
+          ],
+          "samples_ts": [
+            5.46605,
+            5.46503,
+            5.46147
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1269
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:49:14.001022+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:44:34Z\",\n    \"avg_ns\": 52443491633,\n    \"stddev_ns\": 12842860,\n    \"avg_ts\": 9.762890,\n    \"stddev_ts\": 0.002391,\n    \"samples_ns\": [ 52453666864, 52447746799, 52429061236 ],\n    \"samples_ts\": [ 9.761, 9.7621, 9.76558 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:48:04Z\",\n    \"avg_ns\": 22997594039,\n    \"stddev_ns\": 69619933,\n    \"avg_ts\": 5.565834,\n    \"stddev_ts\": 0.016869,\n    \"samples_ns\": [ 22920401127, 23016752652, 23055628340 ],\n    \"samples_ts\": [ 5.58454, 5.56117, 5.55179 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:44:34Z",
+          "avg_ns": 52443491633,
+          "stddev_ns": 12842860,
+          "avg_ts": 9.76289,
+          "stddev_ts": 0.002391,
+          "samples_ns": [
+            52453666864,
+            52447746799,
+            52429061236
+          ],
+          "samples_ts": [
+            9.761,
+            9.7621,
+            9.76558
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:48:04Z",
+          "avg_ns": 22997594039,
+          "stddev_ns": 69619933,
+          "avg_ts": 5.565834,
+          "stddev_ts": 0.016869,
+          "samples_ns": [
+            22920401127,
+            23016752652,
+            23055628340
+          ],
+          "samples_ts": [
+            5.58454,
+            5.56117,
+            5.55179
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1270
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:57:25.392508+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:49:15Z\",\n    \"avg_ns\": 52424787062,\n    \"stddev_ns\": 8968956,\n    \"avg_ts\": 9.766373,\n    \"stddev_ts\": 0.001670,\n    \"samples_ns\": [ 52424386989, 52433946436, 52416027762 ],\n    \"samples_ts\": [ 9.76645, 9.76467, 9.768 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:52:44Z\",\n    \"avg_ns\": 93407669527,\n    \"stddev_ns\": 149570025,\n    \"avg_ts\": 5.481358,\n    \"stddev_ts\": 0.008783,\n    \"samples_ns\": [ 93240629438, 93529194821, 93453184322 ],\n    \"samples_ts\": [ 5.49117, 5.47423, 5.47868 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:49:15Z",
+          "avg_ns": 52424787062,
+          "stddev_ns": 8968956,
+          "avg_ts": 9.766373,
+          "stddev_ts": 0.00167,
+          "samples_ns": [
+            52424386989,
+            52433946436,
+            52416027762
+          ],
+          "samples_ts": [
+            9.76645,
+            9.76467,
+            9.768
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:52:44Z",
+          "avg_ns": 93407669527,
+          "stddev_ns": 149570025,
+          "avg_ts": 5.481358,
+          "stddev_ts": 0.008783,
+          "samples_ns": [
+            93240629438,
+            93529194821,
+            93453184322
+          ],
+          "samples_ts": [
+            5.49117,
+            5.47423,
+            5.47868
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1271
+    },
+    {
+      "timestamp_utc": "2025-12-11T22:59:27.684345+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:57:26Z\",\n    \"avg_ns\": 12922456459,\n    \"stddev_ns\": 3299055,\n    \"avg_ts\": 9.905238,\n    \"stddev_ts\": 0.002527,\n    \"samples_ns\": [ 12918989791, 12922826892, 12925552695 ],\n    \"samples_ts\": [ 9.9079, 9.90495, 9.90286 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:58:18Z\",\n    \"avg_ns\": 23059445470,\n    \"stddev_ns\": 13151816,\n    \"avg_ts\": 5.550872,\n    \"stddev_ts\": 0.003165,\n    \"samples_ns\": [ 23049598546, 23074379063, 23054358803 ],\n    \"samples_ts\": [ 5.55324, 5.54728, 5.5521 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:57:26Z",
+          "avg_ns": 12922456459,
+          "stddev_ns": 3299055,
+          "avg_ts": 9.905238,
+          "stddev_ts": 0.002527,
+          "samples_ns": [
+            12918989791,
+            12922826892,
+            12925552695
+          ],
+          "samples_ts": [
+            9.9079,
+            9.90495,
+            9.90286
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:58:18Z",
+          "avg_ns": 23059445470,
+          "stddev_ns": 13151816,
+          "avg_ts": 5.550872,
+          "stddev_ts": 0.003165,
+          "samples_ns": [
+            23049598546,
+            23074379063,
+            23054358803
+          ],
+          "samples_ts": [
+            5.55324,
+            5.54728,
+            5.5521
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1272
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:05:01.543825+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T22:59:28Z\",\n    \"avg_ns\": 12915229276,\n    \"stddev_ns\": 7861315,\n    \"avg_ts\": 9.910783,\n    \"stddev_ts\": 0.006033,\n    \"samples_ns\": [ 12922570138, 12916181742, 12906935949 ],\n    \"samples_ts\": [ 9.90515, 9.91005, 9.91715 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:00:20Z\",\n    \"avg_ns\": 93587346393,\n    \"stddev_ns\": 38519023,\n    \"avg_ts\": 5.470826,\n    \"stddev_ts\": 0.002252,\n    \"samples_ns\": [ 93584124829, 93550540542, 93627373809 ],\n    \"samples_ts\": [ 5.47101, 5.47298, 5.46849 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T22:59:28Z",
+          "avg_ns": 12915229276,
+          "stddev_ns": 7861315,
+          "avg_ts": 9.910783,
+          "stddev_ts": 0.006033,
+          "samples_ns": [
+            12922570138,
+            12916181742,
+            12906935949
+          ],
+          "samples_ts": [
+            9.90515,
+            9.91005,
+            9.91715
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:00:20Z",
+          "avg_ns": 93587346393,
+          "stddev_ns": 38519023,
+          "avg_ts": 5.470826,
+          "stddev_ts": 0.002252,
+          "samples_ns": [
+            93584124829,
+            93550540542,
+            93627373809
+          ],
+          "samples_ts": [
+            5.47101,
+            5.47298,
+            5.46849
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1273
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:09:39.169771+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:05:02Z\",\n    \"avg_ns\": 51778719238,\n    \"stddev_ns\": 15490017,\n    \"avg_ts\": 9.888233,\n    \"stddev_ts\": 0.002957,\n    \"samples_ns\": [ 51771556227, 51796490724, 51768110765 ],\n    \"samples_ts\": [ 9.8896, 9.88484, 9.89026 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:08:29Z\",\n    \"avg_ns\": 23023335194,\n    \"stddev_ns\": 28480974,\n    \"avg_ts\": 5.559582,\n    \"stddev_ts\": 0.006882,\n    \"samples_ns\": [ 22990539954, 23041853443, 23037612186 ],\n    \"samples_ts\": [ 5.56751, 5.55511, 5.55613 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:05:02Z",
+          "avg_ns": 51778719238,
+          "stddev_ns": 15490017,
+          "avg_ts": 9.888233,
+          "stddev_ts": 0.002957,
+          "samples_ns": [
+            51771556227,
+            51796490724,
+            51768110765
+          ],
+          "samples_ts": [
+            9.8896,
+            9.88484,
+            9.89026
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:08:29Z",
+          "avg_ns": 23023335194,
+          "stddev_ns": 28480974,
+          "avg_ts": 5.559582,
+          "stddev_ts": 0.006882,
+          "samples_ns": [
+            22990539954,
+            23041853443,
+            23037612186
+          ],
+          "samples_ts": [
+            5.56751,
+            5.55511,
+            5.55613
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1274
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:17:48.096892+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:09:40Z\",\n    \"avg_ns\": 51774613588,\n    \"stddev_ns\": 3609448,\n    \"avg_ts\": 9.889016,\n    \"stddev_ts\": 0.000688,\n    \"samples_ns\": [ 51775497392, 51777691702, 51770651671 ],\n    \"samples_ts\": [ 9.88885, 9.88843, 9.88977 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:13:07Z\",\n    \"avg_ns\": 93448746505,\n    \"stddev_ns\": 77270887,\n    \"avg_ts\": 5.478942,\n    \"stddev_ts\": 0.004533,\n    \"samples_ns\": [ 93359557302, 93495478294, 93491203921 ],\n    \"samples_ts\": [ 5.48417, 5.4762, 5.47645 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:09:40Z",
+          "avg_ns": 51774613588,
+          "stddev_ns": 3609448,
+          "avg_ts": 9.889016,
+          "stddev_ts": 0.000688,
+          "samples_ns": [
+            51775497392,
+            51777691702,
+            51770651671
+          ],
+          "samples_ts": [
+            9.88885,
+            9.88843,
+            9.88977
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:13:07Z",
+          "avg_ns": 93448746505,
+          "stddev_ns": 77270887,
+          "avg_ts": 5.478942,
+          "stddev_ts": 0.004533,
+          "samples_ns": [
+            93359557302,
+            93495478294,
+            93491203921
+          ],
+          "samples_ts": [
+            5.48417,
+            5.4762,
+            5.47645
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1275
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:19:50.221220+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:17:49Z\",\n    \"avg_ns\": 12911270230,\n    \"stddev_ns\": 3699205,\n    \"avg_ts\": 9.913820,\n    \"stddev_ts\": 0.002840,\n    \"samples_ns\": [ 12908840271, 12915527511, 12909442908 ],\n    \"samples_ts\": [ 9.91569, 9.91055, 9.91522 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:18:40Z\",\n    \"avg_ns\": 23020667975,\n    \"stddev_ns\": 29065842,\n    \"avg_ts\": 5.560227,\n    \"stddev_ts\": 0.007025,\n    \"samples_ns\": [ 22987558719, 23041982445, 23032462761 ],\n    \"samples_ts\": [ 5.56823, 5.55508, 5.55737 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:17:49Z",
+          "avg_ns": 12911270230,
+          "stddev_ns": 3699205,
+          "avg_ts": 9.91382,
+          "stddev_ts": 0.00284,
+          "samples_ns": [
+            12908840271,
+            12915527511,
+            12909442908
+          ],
+          "samples_ts": [
+            9.91569,
+            9.91055,
+            9.91522
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:18:40Z",
+          "avg_ns": 23020667975,
+          "stddev_ns": 29065842,
+          "avg_ts": 5.560227,
+          "stddev_ts": 0.007025,
+          "samples_ns": [
+            22987558719,
+            23041982445,
+            23032462761
+          ],
+          "samples_ts": [
+            5.56823,
+            5.55508,
+            5.55737
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1276
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:25:23.758550+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:19:51Z\",\n    \"avg_ns\": 12912258945,\n    \"stddev_ns\": 4006272,\n    \"avg_ts\": 9.913061,\n    \"stddev_ts\": 0.003075,\n    \"samples_ns\": [ 12915193024, 12913887176, 12907696636 ],\n    \"samples_ts\": [ 9.91081, 9.91181, 9.91656 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:20:43Z\",\n    \"avg_ns\": 93477694862,\n    \"stddev_ns\": 55686867,\n    \"avg_ts\": 5.477244,\n    \"stddev_ts\": 0.003263,\n    \"samples_ns\": [ 93474282165, 93535009594, 93423792827 ],\n    \"samples_ts\": [ 5.47744, 5.47389, 5.4804 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:19:51Z",
+          "avg_ns": 12912258945,
+          "stddev_ns": 4006272,
+          "avg_ts": 9.913061,
+          "stddev_ts": 0.003075,
+          "samples_ns": [
+            12915193024,
+            12913887176,
+            12907696636
+          ],
+          "samples_ts": [
+            9.91081,
+            9.91181,
+            9.91656
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:20:43Z",
+          "avg_ns": 93477694862,
+          "stddev_ns": 55686867,
+          "avg_ts": 5.477244,
+          "stddev_ts": 0.003263,
+          "samples_ns": [
+            93474282165,
+            93535009594,
+            93423792827
+          ],
+          "samples_ts": [
+            5.47744,
+            5.47389,
+            5.4804
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1277
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:30:02.486762+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:25:24Z\",\n    \"avg_ns\": 52001708037,\n    \"stddev_ns\": 6786771,\n    \"avg_ts\": 9.845831,\n    \"stddev_ts\": 0.001284,\n    \"samples_ns\": [ 52008667567, 52001331493, 51995125053 ],\n    \"samples_ts\": [ 9.84451, 9.8459, 9.84708 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:28:52Z\",\n    \"avg_ns\": 23092597261,\n    \"stddev_ns\": 12924833,\n    \"avg_ts\": 5.542903,\n    \"stddev_ts\": 0.003102,\n    \"samples_ns\": [ 23091686499, 23080151898, 23105953386 ],\n    \"samples_ts\": [ 5.54312, 5.54589, 5.5397 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:25:24Z",
+          "avg_ns": 52001708037,
+          "stddev_ns": 6786771,
+          "avg_ts": 9.845831,
+          "stddev_ts": 0.001284,
+          "samples_ns": [
+            52008667567,
+            52001331493,
+            51995125053
+          ],
+          "samples_ts": [
+            9.84451,
+            9.8459,
+            9.84708
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:28:52Z",
+          "avg_ns": 23092597261,
+          "stddev_ns": 12924833,
+          "avg_ts": 5.542903,
+          "stddev_ts": 0.003102,
+          "samples_ns": [
+            23091686499,
+            23080151898,
+            23105953386
+          ],
+          "samples_ts": [
+            5.54312,
+            5.54589,
+            5.5397
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1278
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:38:12.473730+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:30:03Z\",\n    \"avg_ns\": 52009210014,\n    \"stddev_ns\": 5506985,\n    \"avg_ts\": 9.844410,\n    \"stddev_ts\": 0.001041,\n    \"samples_ns\": [ 52013731346, 52003090457, 52010808241 ],\n    \"samples_ts\": [ 9.84355, 9.84557, 9.84411 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:33:31Z\",\n    \"avg_ns\": 93487464132,\n    \"stddev_ns\": 42216984,\n    \"avg_ts\": 5.476671,\n    \"stddev_ts\": 0.002473,\n    \"samples_ns\": [ 93448355220, 93481816534, 93532220642 ],\n    \"samples_ts\": [ 5.47896, 5.477, 5.47405 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:30:03Z",
+          "avg_ns": 52009210014,
+          "stddev_ns": 5506985,
+          "avg_ts": 9.84441,
+          "stddev_ts": 0.001041,
+          "samples_ns": [
+            52013731346,
+            52003090457,
+            52010808241
+          ],
+          "samples_ts": [
+            9.84355,
+            9.84557,
+            9.84411
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:33:31Z",
+          "avg_ns": 93487464132,
+          "stddev_ns": 42216984,
+          "avg_ts": 5.476671,
+          "stddev_ts": 0.002473,
+          "samples_ns": [
+            93448355220,
+            93481816534,
+            93532220642
+          ],
+          "samples_ts": [
+            5.47896,
+            5.477,
+            5.47405
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1279
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:40:14.672022+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:38:13Z\",\n    \"avg_ns\": 12906730462,\n    \"stddev_ns\": 1749252,\n    \"avg_ts\": 9.917306,\n    \"stddev_ts\": 0.001338,\n    \"samples_ns\": [ 12908577537, 12905117500, 12906496351 ],\n    \"samples_ts\": [ 9.91589, 9.91855, 9.91749 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:39:05Z\",\n    \"avg_ns\": 23044588517,\n    \"stddev_ns\": 35322006,\n    \"avg_ts\": 5.554458,\n    \"stddev_ts\": 0.008521,\n    \"samples_ns\": [ 23003914793, 23067539602, 23062311158 ],\n    \"samples_ts\": [ 5.56427, 5.54892, 5.55018 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:38:13Z",
+          "avg_ns": 12906730462,
+          "stddev_ns": 1749252,
+          "avg_ts": 9.917306,
+          "stddev_ts": 0.001338,
+          "samples_ns": [
+            12908577537,
+            12905117500,
+            12906496351
+          ],
+          "samples_ts": [
+            9.91589,
+            9.91855,
+            9.91749
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:39:05Z",
+          "avg_ns": 23044588517,
+          "stddev_ns": 35322006,
+          "avg_ts": 5.554458,
+          "stddev_ts": 0.008521,
+          "samples_ns": [
+            23003914793,
+            23067539602,
+            23062311158
+          ],
+          "samples_ts": [
+            5.56427,
+            5.54892,
+            5.55018
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1280
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:45:48.292960+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:40:15Z\",\n    \"avg_ns\": 12912138037,\n    \"stddev_ns\": 3251647,\n    \"avg_ts\": 9.913153,\n    \"stddev_ts\": 0.002493,\n    \"samples_ns\": [ 12911313272, 12915718576, 12909382265 ],\n    \"samples_ts\": [ 9.91379, 9.9104, 9.91527 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:41:07Z\",\n    \"avg_ns\": 93488096909,\n    \"stddev_ns\": 58016239,\n    \"avg_ts\": 5.476634,\n    \"stddev_ts\": 0.003400,\n    \"samples_ns\": [ 93422081643, 93511246274, 93530962812 ],\n    \"samples_ts\": [ 5.4805, 5.47528, 5.47412 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:40:15Z",
+          "avg_ns": 12912138037,
+          "stddev_ns": 3251647,
+          "avg_ts": 9.913153,
+          "stddev_ts": 0.002493,
+          "samples_ns": [
+            12911313272,
+            12915718576,
+            12909382265
+          ],
+          "samples_ts": [
+            9.91379,
+            9.9104,
+            9.91527
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:41:07Z",
+          "avg_ns": 93488096909,
+          "stddev_ns": 58016239,
+          "avg_ts": 5.476634,
+          "stddev_ts": 0.0034,
+          "samples_ns": [
+            93422081643,
+            93511246274,
+            93530962812
+          ],
+          "samples_ts": [
+            5.4805,
+            5.47528,
+            5.47412
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1281
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:50:28.347177+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:45:49Z\",\n    \"avg_ns\": 52425344094,\n    \"stddev_ns\": 6208984,\n    \"avg_ts\": 9.766269,\n    \"stddev_ts\": 0.001157,\n    \"samples_ns\": [ 52418254355, 52427965284, 52429812643 ],\n    \"samples_ts\": [ 9.76759, 9.76578, 9.76544 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:49:19Z\",\n    \"avg_ns\": 22959614102,\n    \"stddev_ns\": 160664203,\n    \"avg_ts\": 5.575189,\n    \"stddev_ts\": 0.039170,\n    \"samples_ns\": [ 22774345572, 23043904564, 23060592172 ],\n    \"samples_ts\": [ 5.62036, 5.55461, 5.55059 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:45:49Z",
+          "avg_ns": 52425344094,
+          "stddev_ns": 6208984,
+          "avg_ts": 9.766269,
+          "stddev_ts": 0.001157,
+          "samples_ns": [
+            52418254355,
+            52427965284,
+            52429812643
+          ],
+          "samples_ts": [
+            9.76759,
+            9.76578,
+            9.76544
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:49:19Z",
+          "avg_ns": 22959614102,
+          "stddev_ns": 160664203,
+          "avg_ts": 5.575189,
+          "stddev_ts": 0.03917,
+          "samples_ns": [
+            22774345572,
+            23043904564,
+            23060592172
+          ],
+          "samples_ts": [
+            5.62036,
+            5.55461,
+            5.55059
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1282
+    },
+    {
+      "timestamp_utc": "2025-12-11T23:58:39.439417+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:50:29Z\",\n    \"avg_ns\": 52410911796,\n    \"stddev_ns\": 3733453,\n    \"avg_ts\": 9.768958,\n    \"stddev_ts\": 0.000696,\n    \"samples_ns\": [ 52411031518, 52414583949, 52407119921 ],\n    \"samples_ts\": [ 9.76894, 9.76827, 9.76966 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:53:59Z\",\n    \"avg_ns\": 93320661471,\n    \"stddev_ns\": 51744120,\n    \"avg_ts\": 5.486460,\n    \"stddev_ts\": 0.003043,\n    \"samples_ns\": [ 93261238953, 93355765677, 93344979784 ],\n    \"samples_ts\": [ 5.48995, 5.4844, 5.48503 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:50:29Z",
+          "avg_ns": 52410911796,
+          "stddev_ns": 3733453,
+          "avg_ts": 9.768958,
+          "stddev_ts": 0.000696,
+          "samples_ns": [
+            52411031518,
+            52414583949,
+            52407119921
+          ],
+          "samples_ts": [
+            9.76894,
+            9.76827,
+            9.76966
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:53:59Z",
+          "avg_ns": 93320661471,
+          "stddev_ns": 51744120,
+          "avg_ts": 5.48646,
+          "stddev_ts": 0.003043,
+          "samples_ns": [
+            93261238953,
+            93355765677,
+            93344979784
+          ],
+          "samples_ts": [
+            5.48995,
+            5.4844,
+            5.48503
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1283
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:00:41.922339+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:58:40Z\",\n    \"avg_ns\": 12912498217,\n    \"stddev_ns\": 2739622,\n    \"avg_ts\": 9.912877,\n    \"stddev_ts\": 0.002100,\n    \"samples_ns\": [ 12912328643, 12909852046, 12915313964 ],\n    \"samples_ts\": [ 9.91301, 9.91491, 9.91072 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-11T23:59:32Z\",\n    \"avg_ns\": 23133574753,\n    \"stddev_ns\": 141184699,\n    \"avg_ts\": 5.533221,\n    \"stddev_ts\": 0.033882,\n    \"samples_ns\": [ 22971606464, 23230615812, 23198501983 ],\n    \"samples_ts\": [ 5.5721, 5.50997, 5.5176 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:58:40Z",
+          "avg_ns": 12912498217,
+          "stddev_ns": 2739622,
+          "avg_ts": 9.912877,
+          "stddev_ts": 0.0021,
+          "samples_ns": [
+            12912328643,
+            12909852046,
+            12915313964
+          ],
+          "samples_ts": [
+            9.91301,
+            9.91491,
+            9.91072
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-11T23:59:32Z",
+          "avg_ns": 23133574753,
+          "stddev_ns": 141184699,
+          "avg_ts": 5.533221,
+          "stddev_ts": 0.033882,
+          "samples_ns": [
+            22971606464,
+            23230615812,
+            23198501983
+          ],
+          "samples_ts": [
+            5.5721,
+            5.50997,
+            5.5176
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1284
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:06:15.150172+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:00:43Z\",\n    \"avg_ns\": 12908287217,\n    \"stddev_ns\": 3539307,\n    \"avg_ts\": 9.916111,\n    \"stddev_ts\": 0.002716,\n    \"samples_ns\": [ 12906444187, 12906053846, 12912363620 ],\n    \"samples_ts\": [ 9.91753, 9.91783, 9.91298 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:01:34Z\",\n    \"avg_ns\": 93356457802,\n    \"stddev_ns\": 87997164,\n    \"avg_ts\": 5.484359,\n    \"stddev_ts\": 0.005170,\n    \"samples_ns\": [ 93265923754, 93441674843, 93361774811 ],\n    \"samples_ts\": [ 5.48968, 5.47935, 5.48404 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:00:43Z",
+          "avg_ns": 12908287217,
+          "stddev_ns": 3539307,
+          "avg_ts": 9.916111,
+          "stddev_ts": 0.002716,
+          "samples_ns": [
+            12906444187,
+            12906053846,
+            12912363620
+          ],
+          "samples_ts": [
+            9.91753,
+            9.91783,
+            9.91298
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:01:34Z",
+          "avg_ns": 93356457802,
+          "stddev_ns": 87997164,
+          "avg_ts": 5.484359,
+          "stddev_ts": 0.00517,
+          "samples_ns": [
+            93265923754,
+            93441674843,
+            93361774811
+          ],
+          "samples_ts": [
+            5.48968,
+            5.47935,
+            5.48404
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1285
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:10:52.532473+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:06:16Z\",\n    \"avg_ns\": 51760556523,\n    \"stddev_ns\": 7142204,\n    \"avg_ts\": 9.891702,\n    \"stddev_ts\": 0.001364,\n    \"samples_ns\": [ 51765512482, 51763778231, 51752378858 ],\n    \"samples_ts\": [ 9.89075, 9.89109, 9.89327 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:09:43Z\",\n    \"avg_ns\": 22962740552,\n    \"stddev_ns\": 183153515,\n    \"avg_ts\": 5.574485,\n    \"stddev_ts\": 0.044668,\n    \"samples_ns\": [ 22751259819, 23069942090, 23067019747 ],\n    \"samples_ts\": [ 5.62606, 5.54835, 5.54905 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:06:16Z",
+          "avg_ns": 51760556523,
+          "stddev_ns": 7142204,
+          "avg_ts": 9.891702,
+          "stddev_ts": 0.001364,
+          "samples_ns": [
+            51765512482,
+            51763778231,
+            51752378858
+          ],
+          "samples_ts": [
+            9.89075,
+            9.89109,
+            9.89327
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:09:43Z",
+          "avg_ns": 22962740552,
+          "stddev_ns": 183153515,
+          "avg_ts": 5.574485,
+          "stddev_ts": 0.044668,
+          "samples_ns": [
+            22751259819,
+            23069942090,
+            23067019747
+          ],
+          "samples_ts": [
+            5.62606,
+            5.54835,
+            5.54905
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1286
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:19:01.187108+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:10:53Z\",\n    \"avg_ns\": 51772636114,\n    \"stddev_ns\": 8040199,\n    \"avg_ts\": 9.889394,\n    \"stddev_ts\": 0.001535,\n    \"samples_ns\": [ 51771728192, 51781088500, 51765091651 ],\n    \"samples_ts\": [ 9.88957, 9.88778, 9.89084 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:14:20Z\",\n    \"avg_ns\": 93378477082,\n    \"stddev_ns\": 177282177,\n    \"avg_ts\": 5.483075,\n    \"stddev_ts\": 0.010419,\n    \"samples_ns\": [ 93178204593, 93515317115, 93441909538 ],\n    \"samples_ts\": [ 5.49485, 5.47504, 5.47934 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:10:53Z",
+          "avg_ns": 51772636114,
+          "stddev_ns": 8040199,
+          "avg_ts": 9.889394,
+          "stddev_ts": 0.001535,
+          "samples_ns": [
+            51771728192,
+            51781088500,
+            51765091651
+          ],
+          "samples_ts": [
+            9.88957,
+            9.88778,
+            9.89084
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:14:20Z",
+          "avg_ns": 93378477082,
+          "stddev_ns": 177282177,
+          "avg_ts": 5.483075,
+          "stddev_ts": 0.010419,
+          "samples_ns": [
+            93178204593,
+            93515317115,
+            93441909538
+          ],
+          "samples_ts": [
+            5.49485,
+            5.47504,
+            5.47934
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1287
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:21:03.388860+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:19:02Z\",\n    \"avg_ns\": 12910479338,\n    \"stddev_ns\": 927424,\n    \"avg_ts\": 9.914427,\n    \"stddev_ts\": 0.000707,\n    \"samples_ns\": [ 12910357953, 12911454446, 12909625616 ],\n    \"samples_ts\": [ 9.91452, 9.91368, 9.91508 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:19:53Z\",\n    \"avg_ns\": 23041193366,\n    \"stddev_ns\": 68847071,\n    \"avg_ts\": 5.555301,\n    \"stddev_ts\": 0.016617,\n    \"samples_ns\": [ 22965161629, 23099316458, 23059102011 ],\n    \"samples_ts\": [ 5.57366, 5.54129, 5.55095 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:19:02Z",
+          "avg_ns": 12910479338,
+          "stddev_ns": 927424,
+          "avg_ts": 9.914427,
+          "stddev_ts": 0.000707,
+          "samples_ns": [
+            12910357953,
+            12911454446,
+            12909625616
+          ],
+          "samples_ts": [
+            9.91452,
+            9.91368,
+            9.91508
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:19:53Z",
+          "avg_ns": 23041193366,
+          "stddev_ns": 68847071,
+          "avg_ts": 5.555301,
+          "stddev_ts": 0.016617,
+          "samples_ns": [
+            22965161629,
+            23099316458,
+            23059102011
+          ],
+          "samples_ts": [
+            5.57366,
+            5.54129,
+            5.55095
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1288
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:26:36.978607+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:21:04Z\",\n    \"avg_ns\": 12906433601,\n    \"stddev_ns\": 2375919,\n    \"avg_ts\": 9.917535,\n    \"stddev_ts\": 0.001826,\n    \"samples_ns\": [ 12908470922, 12907006162, 12903823719 ],\n    \"samples_ts\": [ 9.91597, 9.91709, 9.91954 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:21:56Z\",\n    \"avg_ns\": 93466468682,\n    \"stddev_ns\": 79555233,\n    \"avg_ts\": 5.477903,\n    \"stddev_ts\": 0.004663,\n    \"samples_ns\": [ 93381592016, 93478476787, 93539337243 ],\n    \"samples_ts\": [ 5.48288, 5.4772, 5.47363 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:21:04Z",
+          "avg_ns": 12906433601,
+          "stddev_ns": 2375919,
+          "avg_ts": 9.917535,
+          "stddev_ts": 0.001826,
+          "samples_ns": [
+            12908470922,
+            12907006162,
+            12903823719
+          ],
+          "samples_ts": [
+            9.91597,
+            9.91709,
+            9.91954
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:21:56Z",
+          "avg_ns": 93466468682,
+          "stddev_ns": 79555233,
+          "avg_ts": 5.477903,
+          "stddev_ts": 0.004663,
+          "samples_ns": [
+            93381592016,
+            93478476787,
+            93539337243
+          ],
+          "samples_ts": [
+            5.48288,
+            5.4772,
+            5.47363
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1289
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:31:15.255031+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:26:38Z\",\n    \"avg_ns\": 52009434889,\n    \"stddev_ns\": 6410168,\n    \"avg_ts\": 9.844368,\n    \"stddev_ts\": 0.001213,\n    \"samples_ns\": [ 52010572590, 52002536151, 52015195927 ],\n    \"samples_ts\": [ 9.84415, 9.84567, 9.84328 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:30:06Z\",\n    \"avg_ns\": 22927065547,\n    \"stddev_ns\": 166771785,\n    \"avg_ts\": 5.583119,\n    \"stddev_ts\": 0.040782,\n    \"samples_ns\": [ 22734634469, 23016914087, 23029648085 ],\n    \"samples_ts\": [ 5.63018, 5.56113, 5.55805 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:26:38Z",
+          "avg_ns": 52009434889,
+          "stddev_ns": 6410168,
+          "avg_ts": 9.844368,
+          "stddev_ts": 0.001213,
+          "samples_ns": [
+            52010572590,
+            52002536151,
+            52015195927
+          ],
+          "samples_ts": [
+            9.84415,
+            9.84567,
+            9.84328
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:30:06Z",
+          "avg_ns": 22927065547,
+          "stddev_ns": 166771785,
+          "avg_ts": 5.583119,
+          "stddev_ts": 0.040782,
+          "samples_ns": [
+            22734634469,
+            23016914087,
+            23029648085
+          ],
+          "samples_ts": [
+            5.63018,
+            5.56113,
+            5.55805
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1290
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:39:24.831784+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:31:16Z\",\n    \"avg_ns\": 52011282095,\n    \"stddev_ns\": 3112193,\n    \"avg_ts\": 9.844018,\n    \"stddev_ts\": 0.000587,\n    \"samples_ns\": [ 52007840288, 52012137431, 52013868567 ],\n    \"samples_ts\": [ 9.84467, 9.84386, 9.84353 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:34:44Z\",\n    \"avg_ns\": 93359953117,\n    \"stddev_ns\": 201419623,\n    \"avg_ts\": 5.484167,\n    \"stddev_ts\": 0.011846,\n    \"samples_ns\": [ 93127972562, 93461502555, 93490384235 ],\n    \"samples_ts\": [ 5.49781, 5.47819, 5.4765 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:31:16Z",
+          "avg_ns": 52011282095,
+          "stddev_ns": 3112193,
+          "avg_ts": 9.844018,
+          "stddev_ts": 0.000587,
+          "samples_ns": [
+            52007840288,
+            52012137431,
+            52013868567
+          ],
+          "samples_ts": [
+            9.84467,
+            9.84386,
+            9.84353
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:34:44Z",
+          "avg_ns": 93359953117,
+          "stddev_ns": 201419623,
+          "avg_ts": 5.484167,
+          "stddev_ts": 0.011846,
+          "samples_ns": [
+            93127972562,
+            93461502555,
+            93490384235
+          ],
+          "samples_ts": [
+            5.49781,
+            5.47819,
+            5.4765
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1291
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:41:27.098676+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:39:26Z\",\n    \"avg_ns\": 12912186184,\n    \"stddev_ns\": 5597015,\n    \"avg_ts\": 9.913117,\n    \"stddev_ts\": 0.004296,\n    \"samples_ns\": [ 12907422175, 12910786027, 12918350350 ],\n    \"samples_ts\": [ 9.91677, 9.91419, 9.90839 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:40:17Z\",\n    \"avg_ns\": 23048535822,\n    \"stddev_ns\": 48236278,\n    \"avg_ts\": 5.553514,\n    \"stddev_ts\": 0.011633,\n    \"samples_ns\": [ 22994279458, 23086567106, 23064760904 ],\n    \"samples_ts\": [ 5.5666, 5.54435, 5.54959 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:39:26Z",
+          "avg_ns": 12912186184,
+          "stddev_ns": 5597015,
+          "avg_ts": 9.913117,
+          "stddev_ts": 0.004296,
+          "samples_ns": [
+            12907422175,
+            12910786027,
+            12918350350
+          ],
+          "samples_ts": [
+            9.91677,
+            9.91419,
+            9.90839
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:40:17Z",
+          "avg_ns": 23048535822,
+          "stddev_ns": 48236278,
+          "avg_ts": 5.553514,
+          "stddev_ts": 0.011633,
+          "samples_ns": [
+            22994279458,
+            23086567106,
+            23064760904
+          ],
+          "samples_ts": [
+            5.5666,
+            5.54435,
+            5.54959
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1292
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:47:00.584021+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:41:28Z\",\n    \"avg_ns\": 12917913485,\n    \"stddev_ns\": 1200238,\n    \"avg_ts\": 9.908721,\n    \"stddev_ts\": 0.000912,\n    \"samples_ns\": [ 12919259158, 12917002720, 12917478579 ],\n    \"samples_ts\": [ 9.90769, 9.90942, 9.90905 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:42:19Z\",\n    \"avg_ns\": 93455191906,\n    \"stddev_ns\": 40726575,\n    \"avg_ts\": 5.478562,\n    \"stddev_ts\": 0.002388,\n    \"samples_ns\": [ 93473817004, 93408484307, 93483274408 ],\n    \"samples_ts\": [ 5.47747, 5.4813, 5.47692 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:41:28Z",
+          "avg_ns": 12917913485,
+          "stddev_ns": 1200238,
+          "avg_ts": 9.908721,
+          "stddev_ts": 0.000912,
+          "samples_ns": [
+            12919259158,
+            12917002720,
+            12917478579
+          ],
+          "samples_ts": [
+            9.90769,
+            9.90942,
+            9.90905
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:42:19Z",
+          "avg_ns": 93455191906,
+          "stddev_ns": 40726575,
+          "avg_ts": 5.478562,
+          "stddev_ts": 0.002388,
+          "samples_ns": [
+            93473817004,
+            93408484307,
+            93483274408
+          ],
+          "samples_ts": [
+            5.47747,
+            5.4813,
+            5.47692
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1293
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:51:40.854569+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:47:01Z\",\n    \"avg_ns\": 52417618966,\n    \"stddev_ns\": 5552454,\n    \"avg_ts\": 9.767708,\n    \"stddev_ts\": 0.001033,\n    \"samples_ns\": [ 52423705353, 52412860593, 52416290954 ],\n    \"samples_ts\": [ 9.76657, 9.76859, 9.76796 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:50:31Z\",\n    \"avg_ns\": 23047143990,\n    \"stddev_ns\": 79157330,\n    \"avg_ts\": 5.553877,\n    \"stddev_ts\": 0.019080,\n    \"samples_ns\": [ 22966251462, 23124443465, 23050737044 ],\n    \"samples_ts\": [ 5.5734, 5.53527, 5.55297 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:47:01Z",
+          "avg_ns": 52417618966,
+          "stddev_ns": 5552454,
+          "avg_ts": 9.767708,
+          "stddev_ts": 0.001033,
+          "samples_ns": [
+            52423705353,
+            52412860593,
+            52416290954
+          ],
+          "samples_ts": [
+            9.76657,
+            9.76859,
+            9.76796
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:50:31Z",
+          "avg_ns": 23047143990,
+          "stddev_ns": 79157330,
+          "avg_ts": 5.553877,
+          "stddev_ts": 0.01908,
+          "samples_ns": [
+            22966251462,
+            23124443465,
+            23050737044
+          ],
+          "samples_ts": [
+            5.5734,
+            5.53527,
+            5.55297
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1294
+    },
+    {
+      "timestamp_utc": "2025-12-12T00:59:52.173752+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:51:42Z\",\n    \"avg_ns\": 52414692240,\n    \"stddev_ns\": 2998771,\n    \"avg_ts\": 9.768254,\n    \"stddev_ts\": 0.000556,\n    \"samples_ns\": [ 52413535420, 52418078520, 52412462782 ],\n    \"samples_ts\": [ 9.76847, 9.76762, 9.76867 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 4B Q2_K - Medium\",\n    \"model_size\": 1722623232,\n    \"model_n_params\": 3880263168,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T00:55:11Z\",\n    \"avg_ns\": 93398392887,\n    \"stddev_ns\": 71287126,\n    \"avg_ts\": 5.481895,\n    \"stddev_ts\": 0.004184,\n    \"samples_ns\": [ 93329690407, 93472010288, 93393477966 ],\n    \"samples_ts\": [ 5.48593, 5.47758, 5.48218 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:51:42Z",
+          "avg_ns": 52414692240,
+          "stddev_ns": 2998771,
+          "avg_ts": 9.768254,
+          "stddev_ts": 0.000556,
+          "samples_ns": [
+            52413535420,
+            52418078520,
+            52412462782
+          ],
+          "samples_ts": [
+            9.76847,
+            9.76762,
+            9.76867
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 4B Q2_K - Medium",
+          "model_size": 1722623232,
+          "model_n_params": 3880263168,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T00:55:11Z",
+          "avg_ns": 93398392887,
+          "stddev_ns": 71287126,
+          "avg_ts": 5.481895,
+          "stddev_ts": 0.004184,
+          "samples_ns": [
+            93329690407,
+            93472010288,
+            93393477966
+          ],
+          "samples_ts": [
+            5.48593,
+            5.47758,
+            5.48218
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-4B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1295
+    },
+    {
+      "timestamp_utc": "2025-12-12T01:21:46.864238+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T01:00:47Z\",\n    \"avg_ns\": 168043614257,\n    \"stddev_ns\": 30836230,\n    \"avg_ts\": 0.761707,\n    \"stddev_ts\": 0.000140,\n    \"samples_ns\": [ 168079220355, 168025645585, 168025976831 ],\n    \"samples_ts\": [ 0.761546, 0.761788, 0.761787 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T01:12:05Z\",\n    \"avg_ns\": 193343911175,\n    \"stddev_ns\": 12375716,\n    \"avg_ts\": 0.662033,\n    \"stddev_ts\": 0.000042,\n    \"samples_ns\": [ 193357965035, 193339082379, 193334686112 ],\n    \"samples_ts\": [ 0.661985, 0.662049, 0.662064 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T01:00:47Z",
+          "avg_ns": 168043614257,
+          "stddev_ns": 30836230,
+          "avg_ts": 0.761707,
+          "stddev_ts": 0.00014,
+          "samples_ns": [
+            168079220355,
+            168025645585,
+            168025976831
+          ],
+          "samples_ts": [
+            0.761546,
+            0.761788,
+            0.761787
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T01:12:05Z",
+          "avg_ns": 193343911175,
+          "stddev_ns": 12375716,
+          "avg_ts": 0.662033,
+          "stddev_ts": 4.2e-05,
+          "samples_ns": [
+            193357965035,
+            193339082379,
+            193334686112
+          ],
+          "samples_ts": [
+            0.661985,
+            0.662049,
+            0.662064
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1296
+    },
+    {
+      "timestamp_utc": "2025-12-12T02:11:59.141352+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T01:21:48Z\",\n    \"avg_ns\": 168007756394,\n    \"stddev_ns\": 2586732,\n    \"avg_ts\": 0.761870,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 168010641784, 168006841973, 168005785426 ],\n    \"samples_ts\": [ 0.761857, 0.761874, 0.761879 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T01:33:01Z\",\n    \"avg_ns\": 778787046868,\n    \"stddev_ns\": 17893857,\n    \"avg_ts\": 0.657433,\n    \"stddev_ts\": 0.000015,\n    \"samples_ns\": [ 778807274381, 778780583580, 778773282643 ],\n    \"samples_ts\": [ 0.657416, 0.657438, 0.657444 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T01:21:48Z",
+          "avg_ns": 168007756394,
+          "stddev_ns": 2586732,
+          "avg_ts": 0.76187,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            168010641784,
+            168006841973,
+            168005785426
+          ],
+          "samples_ts": [
+            0.761857,
+            0.761874,
+            0.761879
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T01:33:01Z",
+          "avg_ns": 778787046868,
+          "stddev_ns": 17893857,
+          "avg_ts": 0.657433,
+          "stddev_ts": 1.5e-05,
+          "samples_ns": [
+            778807274381,
+            778780583580,
+            778773282643
+          ],
+          "samples_ts": [
+            0.657416,
+            0.657438,
+            0.657444
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1297
+    },
+    {
+      "timestamp_utc": "2025-12-12T03:06:38.071200+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T02:12:00Z\",\n    \"avg_ns\": 673976699550,\n    \"stddev_ns\": 4667636,\n    \"avg_ts\": 0.759670,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 673981996528, 673974314447, 673973787676 ],\n    \"samples_ts\": [ 0.759664, 0.759673, 0.759673 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T02:56:56Z\",\n    \"avg_ns\": 193375093066,\n    \"stddev_ns\": 6685959,\n    \"avg_ts\": 0.661926,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 193382758001, 193370593315, 193371927883 ],\n    \"samples_ts\": [ 0.6619, 0.661941, 0.661937 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T02:12:00Z",
+          "avg_ns": 673976699550,
+          "stddev_ns": 4667636,
+          "avg_ts": 0.75967,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            673981996528,
+            673974314447,
+            673973787676
+          ],
+          "samples_ts": [
+            0.759664,
+            0.759673,
+            0.759673
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T02:56:56Z",
+          "avg_ns": 193375093066,
+          "stddev_ns": 6685959,
+          "avg_ts": 0.661926,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            193382758001,
+            193370593315,
+            193371927883
+          ],
+          "samples_ts": [
+            0.6619,
+            0.661941,
+            0.661937
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1298
+    },
+    {
+      "timestamp_utc": "2025-12-12T04:30:36.418079+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T03:06:39Z\",\n    \"avg_ns\": 673995201446,\n    \"stddev_ns\": 1803020,\n    \"avg_ts\": 0.759649,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 673994082592, 673994779031, 673996742717 ],\n    \"samples_ts\": [ 0.759651, 0.75965, 0.759648 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T03:51:35Z\",\n    \"avg_ns\": 779814365617,\n    \"stddev_ns\": 7234088,\n    \"avg_ts\": 0.656567,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 779822415852, 779812271128, 779808409871 ],\n    \"samples_ts\": [ 0.65656, 0.656568, 0.656572 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T03:06:39Z",
+          "avg_ns": 673995201446,
+          "stddev_ns": 1803020,
+          "avg_ts": 0.759649,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            673994082592,
+            673994779031,
+            673996742717
+          ],
+          "samples_ts": [
+            0.759651,
+            0.75965,
+            0.759648
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T03:51:35Z",
+          "avg_ns": 779814365617,
+          "stddev_ns": 7234088,
+          "avg_ts": 0.656567,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            779822415852,
+            779812271128,
+            779808409871
+          ],
+          "samples_ts": [
+            0.65656,
+            0.656568,
+            0.656572
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1299
+    },
+    {
+      "timestamp_utc": "2025-12-12T04:51:31.215579+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T04:30:37Z\",\n    \"avg_ns\": 168048325900,\n    \"stddev_ns\": 1345367,\n    \"avg_ts\": 0.761686,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 168047773990, 168049859456, 168047344254 ],\n    \"samples_ts\": [ 0.761688, 0.761679, 0.76169 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T04:41:49Z\",\n    \"avg_ns\": 193244626913,\n    \"stddev_ns\": 8365531,\n    \"avg_ts\": 0.662373,\n    \"stddev_ts\": 0.000029,\n    \"samples_ns\": [ 193254111958, 193238427998, 193241340785 ],\n    \"samples_ts\": [ 0.66234, 0.662394, 0.662384 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T04:30:37Z",
+          "avg_ns": 168048325900,
+          "stddev_ns": 1345367,
+          "avg_ts": 0.761686,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            168047773990,
+            168049859456,
+            168047344254
+          ],
+          "samples_ts": [
+            0.761688,
+            0.761679,
+            0.76169
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T04:41:49Z",
+          "avg_ns": 193244626913,
+          "stddev_ns": 8365531,
+          "avg_ts": 0.662373,
+          "stddev_ts": 2.9e-05,
+          "samples_ns": [
+            193254111958,
+            193238427998,
+            193241340785
+          ],
+          "samples_ts": [
+            0.66234,
+            0.662394,
+            0.662384
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1300
+    },
+    {
+      "timestamp_utc": "2025-12-12T05:41:45.222387+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T04:51:32Z\",\n    \"avg_ns\": 168054228135,\n    \"stddev_ns\": 1553772,\n    \"avg_ts\": 0.761659,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 168054375662, 168052605861, 168055702882 ],\n    \"samples_ts\": [ 0.761658, 0.761666, 0.761652 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T05:02:44Z\",\n    \"avg_ns\": 779629168357,\n    \"stddev_ns\": 7628248,\n    \"avg_ts\": 0.656722,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 779637473245, 779627557920, 779622473906 ],\n    \"samples_ts\": [ 0.656715, 0.656724, 0.656728 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T04:51:32Z",
+          "avg_ns": 168054228135,
+          "stddev_ns": 1553772,
+          "avg_ts": 0.761659,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            168054375662,
+            168052605861,
+            168055702882
+          ],
+          "samples_ts": [
+            0.761658,
+            0.761666,
+            0.761652
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T05:02:44Z",
+          "avg_ns": 779629168357,
+          "stddev_ns": 7628248,
+          "avg_ts": 0.656722,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            779637473245,
+            779627557920,
+            779622473906
+          ],
+          "samples_ts": [
+            0.656715,
+            0.656724,
+            0.656728
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1301
+    },
+    {
+      "timestamp_utc": "2025-12-12T06:36:25.521219+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T05:41:46Z\",\n    \"avg_ns\": 674412872623,\n    \"stddev_ns\": 6391049,\n    \"avg_ts\": 0.759179,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 674419621778, 674412083039, 674406913052 ],\n    \"samples_ts\": [ 0.759171, 0.75918, 0.759186 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T06:26:44Z\",\n    \"avg_ns\": 193254805574,\n    \"stddev_ns\": 11345277,\n    \"avg_ts\": 0.662338,\n    \"stddev_ts\": 0.000039,\n    \"samples_ns\": [ 193267877911, 193247671433, 193248867379 ],\n    \"samples_ts\": [ 0.662293, 0.662362, 0.662358 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T05:41:46Z",
+          "avg_ns": 674412872623,
+          "stddev_ns": 6391049,
+          "avg_ts": 0.759179,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            674419621778,
+            674412083039,
+            674406913052
+          ],
+          "samples_ts": [
+            0.759171,
+            0.75918,
+            0.759186
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T06:26:44Z",
+          "avg_ns": 193254805574,
+          "stddev_ns": 11345277,
+          "avg_ts": 0.662338,
+          "stddev_ts": 3.9e-05,
+          "samples_ns": [
+            193267877911,
+            193247671433,
+            193248867379
+          ],
+          "samples_ts": [
+            0.662293,
+            0.662362,
+            0.662358
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1302
+    },
+    {
+      "timestamp_utc": "2025-12-12T08:00:25.051432+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T06:36:26Z\",\n    \"avg_ns\": 674324385610,\n    \"stddev_ns\": 2058287,\n    \"avg_ts\": 0.759278,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 674324977305, 674322096281, 674326083244 ],\n    \"samples_ts\": [ 0.759278, 0.759281, 0.759277 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T07:21:24Z\",\n    \"avg_ns\": 779782497211,\n    \"stddev_ns\": 4796891,\n    \"avg_ts\": 0.656593,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 779787989752, 779780370752, 779779131129 ],\n    \"samples_ts\": [ 0.656589, 0.656595, 0.656596 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T06:36:26Z",
+          "avg_ns": 674324385610,
+          "stddev_ns": 2058287,
+          "avg_ts": 0.759278,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            674324977305,
+            674322096281,
+            674326083244
+          ],
+          "samples_ts": [
+            0.759278,
+            0.759281,
+            0.759277
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T07:21:24Z",
+          "avg_ns": 779782497211,
+          "stddev_ns": 4796891,
+          "avg_ts": 0.656593,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            779787989752,
+            779780370752,
+            779779131129
+          ],
+          "samples_ts": [
+            0.656589,
+            0.656595,
+            0.656596
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1303
+    },
+    {
+      "timestamp_utc": "2025-12-12T08:21:20.182065+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T08:00:26Z\",\n    \"avg_ns\": 168033600395,\n    \"stddev_ns\": 1003315,\n    \"avg_ts\": 0.761752,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 168034753915, 168033116837, 168032930433 ],\n    \"samples_ts\": [ 0.761747, 0.761755, 0.761755 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T08:11:38Z\",\n    \"avg_ns\": 193383523000,\n    \"stddev_ns\": 543875,\n    \"avg_ts\": 0.661897,\n    \"stddev_ts\": 0.000001,\n    \"samples_ns\": [ 193383892008, 193383321416, 193383355577 ],\n    \"samples_ts\": [ 0.661896, 0.661898, 0.661898 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T08:00:26Z",
+          "avg_ns": 168033600395,
+          "stddev_ns": 1003315,
+          "avg_ts": 0.761752,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            168034753915,
+            168033116837,
+            168032930433
+          ],
+          "samples_ts": [
+            0.761747,
+            0.761755,
+            0.761755
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T08:11:38Z",
+          "avg_ns": 193383523000,
+          "stddev_ns": 543875,
+          "avg_ts": 0.661897,
+          "stddev_ts": 1e-06,
+          "samples_ns": [
+            193383892008,
+            193383321416,
+            193383355577
+          ],
+          "samples_ts": [
+            0.661896,
+            0.661898,
+            0.661898
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1304
+    },
+    {
+      "timestamp_utc": "2025-12-12T09:11:31.998380+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T08:21:21Z\",\n    \"avg_ns\": 168030291025,\n    \"stddev_ns\": 776264,\n    \"avg_ts\": 0.761767,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 168030463137, 168029710694, 168030699246 ],\n    \"samples_ts\": [ 0.761767, 0.76177, 0.761766 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T08:32:33Z\",\n    \"avg_ns\": 778923627411,\n    \"stddev_ns\": 5274745,\n    \"avg_ts\": 0.657317,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 778929708492, 778920289759, 778920883982 ],\n    \"samples_ts\": [ 0.657312, 0.65732, 0.65732 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T08:21:21Z",
+          "avg_ns": 168030291025,
+          "stddev_ns": 776264,
+          "avg_ts": 0.761767,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            168030463137,
+            168029710694,
+            168030699246
+          ],
+          "samples_ts": [
+            0.761767,
+            0.76177,
+            0.761766
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T08:32:33Z",
+          "avg_ns": 778923627411,
+          "stddev_ns": 5274745,
+          "avg_ts": 0.657317,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            778929708492,
+            778920289759,
+            778920883982
+          ],
+          "samples_ts": [
+            0.657312,
+            0.65732,
+            0.65732
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1305
+    },
+    {
+      "timestamp_utc": "2025-12-12T10:06:22.390590+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T09:11:33Z\",\n    \"avg_ns\": 676792339486,\n    \"stddev_ns\": 4222218,\n    \"avg_ts\": 0.756510,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 676795530217, 676793716995, 676787771248 ],\n    \"samples_ts\": [ 0.756506, 0.756508, 0.756515 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T09:56:40Z\",\n    \"avg_ns\": 193427947422,\n    \"stddev_ns\": 16227723,\n    \"avg_ts\": 0.661745,\n    \"stddev_ts\": 0.000055,\n    \"samples_ns\": [ 193445853097, 193423737186, 193414251985 ],\n    \"samples_ts\": [ 0.661684, 0.66176, 0.661792 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T09:11:33Z",
+          "avg_ns": 676792339486,
+          "stddev_ns": 4222218,
+          "avg_ts": 0.75651,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            676795530217,
+            676793716995,
+            676787771248
+          ],
+          "samples_ts": [
+            0.756506,
+            0.756508,
+            0.756515
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T09:56:40Z",
+          "avg_ns": 193427947422,
+          "stddev_ns": 16227723,
+          "avg_ts": 0.661745,
+          "stddev_ts": 5.5e-05,
+          "samples_ns": [
+            193445853097,
+            193423737186,
+            193414251985
+          ],
+          "samples_ts": [
+            0.661684,
+            0.66176,
+            0.661792
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1306
+    },
+    {
+      "timestamp_utc": "2025-12-12T11:30:32.162622+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T10:06:23Z\",\n    \"avg_ns\": 676728337540,\n    \"stddev_ns\": 4124352,\n    \"avg_ts\": 0.756581,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 676724332001, 676728109385, 676732571234 ],\n    \"samples_ts\": [ 0.756586, 0.756582, 0.756577 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T10:51:31Z\",\n    \"avg_ns\": 779633624814,\n    \"stddev_ns\": 14605597,\n    \"avg_ts\": 0.656719,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 779650373867, 779626715134, 779623785442 ],\n    \"samples_ts\": [ 0.656705, 0.656725, 0.656727 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T10:06:23Z",
+          "avg_ns": 676728337540,
+          "stddev_ns": 4124352,
+          "avg_ts": 0.756581,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            676724332001,
+            676728109385,
+            676732571234
+          ],
+          "samples_ts": [
+            0.756586,
+            0.756582,
+            0.756577
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T10:51:31Z",
+          "avg_ns": 779633624814,
+          "stddev_ns": 14605597,
+          "avg_ts": 0.656719,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            779650373867,
+            779626715134,
+            779623785442
+          ],
+          "samples_ts": [
+            0.656705,
+            0.656725,
+            0.656727
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1307
+    },
+    {
+      "timestamp_utc": "2025-12-12T11:51:27.344433+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T11:30:33Z\",\n    \"avg_ns\": 168023353303,\n    \"stddev_ns\": 1521475,\n    \"avg_ts\": 0.761799,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 168021647487, 168024570261, 168023842161 ],\n    \"samples_ts\": [ 0.761807, 0.761793, 0.761797 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T11:41:45Z\",\n    \"avg_ns\": 193389761207,\n    \"stddev_ns\": 3576632,\n    \"avg_ts\": 0.661876,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 193393807013, 193388306199, 193387170410 ],\n    \"samples_ts\": [ 0.661862, 0.661881, 0.661885 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T11:30:33Z",
+          "avg_ns": 168023353303,
+          "stddev_ns": 1521475,
+          "avg_ts": 0.761799,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            168021647487,
+            168024570261,
+            168023842161
+          ],
+          "samples_ts": [
+            0.761807,
+            0.761793,
+            0.761797
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T11:41:45Z",
+          "avg_ns": 193389761207,
+          "stddev_ns": 3576632,
+          "avg_ts": 0.661876,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            193393807013,
+            193388306199,
+            193387170410
+          ],
+          "samples_ts": [
+            0.661862,
+            0.661881,
+            0.661885
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1308
+    },
+    {
+      "timestamp_utc": "2025-12-12T12:41:41.536933+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T11:51:28Z\",\n    \"avg_ns\": 168053705304,\n    \"stddev_ns\": 549684,\n    \"avg_ts\": 0.761661,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 168053352431, 168053679975, 168054083507 ],\n    \"samples_ts\": [ 0.761663, 0.761661, 0.76166 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T12:02:40Z\",\n    \"avg_ns\": 779687846758,\n    \"stddev_ns\": 11085028,\n    \"avg_ts\": 0.656673,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 779700311957, 779683801075, 779679427244 ],\n    \"samples_ts\": [ 0.656663, 0.656676, 0.65668 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T11:51:28Z",
+          "avg_ns": 168053705304,
+          "stddev_ns": 549684,
+          "avg_ts": 0.761661,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            168053352431,
+            168053679975,
+            168054083507
+          ],
+          "samples_ts": [
+            0.761663,
+            0.761661,
+            0.76166
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T12:02:40Z",
+          "avg_ns": 779687846758,
+          "stddev_ns": 11085028,
+          "avg_ts": 0.656673,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            779700311957,
+            779683801075,
+            779679427244
+          ],
+          "samples_ts": [
+            0.656663,
+            0.656676,
+            0.65668
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1309
+    },
+    {
+      "timestamp_utc": "2025-12-12T13:36:20.735797+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T12:41:42Z\",\n    \"avg_ns\": 674183600693,\n    \"stddev_ns\": 11016353,\n    \"avg_ts\": 0.759437,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 674171889634, 674185314968, 674193597479 ],\n    \"samples_ts\": [ 0.75945, 0.759435, 0.759426 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T13:26:39Z\",\n    \"avg_ns\": 193183105356,\n    \"stddev_ns\": 3289573,\n    \"avg_ts\": 0.662584,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 193186660187, 193182306367, 193180349516 ],\n    \"samples_ts\": [ 0.662572, 0.662587, 0.662593 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T12:41:42Z",
+          "avg_ns": 674183600693,
+          "stddev_ns": 11016353,
+          "avg_ts": 0.759437,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            674171889634,
+            674185314968,
+            674193597479
+          ],
+          "samples_ts": [
+            0.75945,
+            0.759435,
+            0.759426
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T13:26:39Z",
+          "avg_ns": 193183105356,
+          "stddev_ns": 3289573,
+          "avg_ts": 0.662584,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            193186660187,
+            193182306367,
+            193180349516
+          ],
+          "samples_ts": [
+            0.662572,
+            0.662587,
+            0.662593
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1310
+    },
+    {
+      "timestamp_utc": "2025-12-12T15:00:17.486725+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T13:36:21Z\",\n    \"avg_ns\": 673986951734,\n    \"stddev_ns\": 4346193,\n    \"avg_ts\": 0.759659,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 673982390801, 673990624982, 673987839421 ],\n    \"samples_ts\": [ 0.759664, 0.759654, 0.759658 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T14:21:17Z\",\n    \"avg_ns\": 779293294372,\n    \"stddev_ns\": 6354442,\n    \"avg_ts\": 0.657006,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 779300005039, 779292508892, 779287369185 ],\n    \"samples_ts\": [ 0.657, 0.657006, 0.657011 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T13:36:21Z",
+          "avg_ns": 673986951734,
+          "stddev_ns": 4346193,
+          "avg_ts": 0.759659,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            673982390801,
+            673990624982,
+            673987839421
+          ],
+          "samples_ts": [
+            0.759664,
+            0.759654,
+            0.759658
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T14:21:17Z",
+          "avg_ns": 779293294372,
+          "stddev_ns": 6354442,
+          "avg_ts": 0.657006,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            779300005039,
+            779292508892,
+            779287369185
+          ],
+          "samples_ts": [
+            0.657,
+            0.657006,
+            0.657011
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1311
+    },
+    {
+      "timestamp_utc": "2025-12-12T15:21:12.788544+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T15:00:18Z\",\n    \"avg_ns\": 168023891576,\n    \"stddev_ns\": 1334792,\n    \"avg_ts\": 0.761796,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 168022486833, 168024228339, 168024959557 ],\n    \"samples_ts\": [ 0.761803, 0.761795, 0.761792 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T15:11:31Z\",\n    \"avg_ns\": 193363517394,\n    \"stddev_ns\": 7302843,\n    \"avg_ts\": 0.661966,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 193371831717, 193360408487, 193358311980 ],\n    \"samples_ts\": [ 0.661937, 0.661976, 0.661983 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T15:00:18Z",
+          "avg_ns": 168023891576,
+          "stddev_ns": 1334792,
+          "avg_ts": 0.761796,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            168022486833,
+            168024228339,
+            168024959557
+          ],
+          "samples_ts": [
+            0.761803,
+            0.761795,
+            0.761792
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T15:11:31Z",
+          "avg_ns": 193363517394,
+          "stddev_ns": 7302843,
+          "avg_ts": 0.661966,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            193371831717,
+            193360408487,
+            193358311980
+          ],
+          "samples_ts": [
+            0.661937,
+            0.661976,
+            0.661983
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1312
+    },
+    {
+      "timestamp_utc": "2025-12-12T16:11:26.649816+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T15:21:14Z\",\n    \"avg_ns\": 168019548686,\n    \"stddev_ns\": 1358757,\n    \"avg_ts\": 0.761816,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 168019401302, 168020911530, 168018333227 ],\n    \"samples_ts\": [ 0.761817, 0.76181, 0.761822 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T15:32:26Z\",\n    \"avg_ns\": 779611251499,\n    \"stddev_ns\": 10459364,\n    \"avg_ts\": 0.656738,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 779621282794, 779611925222, 779600546483 ],\n    \"samples_ts\": [ 0.656729, 0.656737, 0.656747 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T15:21:14Z",
+          "avg_ns": 168019548686,
+          "stddev_ns": 1358757,
+          "avg_ts": 0.761816,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            168019401302,
+            168020911530,
+            168018333227
+          ],
+          "samples_ts": [
+            0.761817,
+            0.76181,
+            0.761822
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T15:32:26Z",
+          "avg_ns": 779611251499,
+          "stddev_ns": 10459364,
+          "avg_ts": 0.656738,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            779621282794,
+            779611925222,
+            779600546483
+          ],
+          "samples_ts": [
+            0.656729,
+            0.656737,
+            0.656747
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1313
+    },
+    {
+      "timestamp_utc": "2025-12-12T17:06:06.654035+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T16:11:27Z\",\n    \"avg_ns\": 674307838364,\n    \"stddev_ns\": 8101791,\n    \"avg_ts\": 0.759297,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 674298730583, 674310951198, 674313833313 ],\n    \"samples_ts\": [ 0.759307, 0.759294, 0.75929 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T16:56:25Z\",\n    \"avg_ns\": 193294210027,\n    \"stddev_ns\": 11521281,\n    \"avg_ts\": 0.662203,\n    \"stddev_ts\": 0.000039,\n    \"samples_ns\": [ 193286263913, 193307423451, 193288942717 ],\n    \"samples_ts\": [ 0.66223, 0.662158, 0.662221 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T16:11:27Z",
+          "avg_ns": 674307838364,
+          "stddev_ns": 8101791,
+          "avg_ts": 0.759297,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            674298730583,
+            674310951198,
+            674313833313
+          ],
+          "samples_ts": [
+            0.759307,
+            0.759294,
+            0.75929
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T16:56:25Z",
+          "avg_ns": 193294210027,
+          "stddev_ns": 11521281,
+          "avg_ts": 0.662203,
+          "stddev_ts": 3.9e-05,
+          "samples_ns": [
+            193286263913,
+            193307423451,
+            193288942717
+          ],
+          "samples_ts": [
+            0.66223,
+            0.662158,
+            0.662221
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1314
+    },
+    {
+      "timestamp_utc": "2025-12-12T18:30:07.104887+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T17:06:07Z\",\n    \"avg_ns\": 674310786681,\n    \"stddev_ns\": 5988787,\n    \"avg_ts\": 0.759294,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 674317635866, 674307450250, 674307273928 ],\n    \"samples_ts\": [ 0.759286, 0.759298, 0.759298 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T17:51:05Z\",\n    \"avg_ns\": 780073574930,\n    \"stddev_ns\": 20896128,\n    \"avg_ts\": 0.656348,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 780091587197, 780078416423, 780050721172 ],\n    \"samples_ts\": [ 0.656333, 0.656344, 0.656368 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T17:06:07Z",
+          "avg_ns": 674310786681,
+          "stddev_ns": 5988787,
+          "avg_ts": 0.759294,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            674317635866,
+            674307450250,
+            674307273928
+          ],
+          "samples_ts": [
+            0.759286,
+            0.759298,
+            0.759298
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T17:51:05Z",
+          "avg_ns": 780073574930,
+          "stddev_ns": 20896128,
+          "avg_ts": 0.656348,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            780091587197,
+            780078416423,
+            780050721172
+          ],
+          "samples_ts": [
+            0.656333,
+            0.656344,
+            0.656368
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1315
+    },
+    {
+      "timestamp_utc": "2025-12-12T18:51:02.734796+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T18:30:08Z\",\n    \"avg_ns\": 168031532161,\n    \"stddev_ns\": 1130590,\n    \"avg_ts\": 0.761762,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 168030427017, 168032686586, 168031482880 ],\n    \"samples_ts\": [ 0.761767, 0.761757, 0.761762 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T18:41:20Z\",\n    \"avg_ns\": 193536270140,\n    \"stddev_ns\": 998830,\n    \"avg_ts\": 0.661375,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 193536017109, 193535527112, 193537266200 ],\n    \"samples_ts\": [ 0.661376, 0.661377, 0.661371 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T18:30:08Z",
+          "avg_ns": 168031532161,
+          "stddev_ns": 1130590,
+          "avg_ts": 0.761762,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            168030427017,
+            168032686586,
+            168031482880
+          ],
+          "samples_ts": [
+            0.761767,
+            0.761757,
+            0.761762
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T18:41:20Z",
+          "avg_ns": 193536270140,
+          "stddev_ns": 998830,
+          "avg_ts": 0.661375,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            193536017109,
+            193535527112,
+            193537266200
+          ],
+          "samples_ts": [
+            0.661376,
+            0.661377,
+            0.661371
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1316
+    },
+    {
+      "timestamp_utc": "2025-12-12T19:41:17.214206+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T18:51:03Z\",\n    \"avg_ns\": 168039502625,\n    \"stddev_ns\": 744979,\n    \"avg_ts\": 0.761726,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 168038994633, 168040196408, 168039316835 ],\n    \"samples_ts\": [ 0.761728, 0.761723, 0.761726 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T19:02:16Z\",\n    \"avg_ns\": 779804587676,\n    \"stddev_ns\": 33435099,\n    \"avg_ts\": 0.656575,\n    \"stddev_ts\": 0.000028,\n    \"samples_ns\": [ 779771288686, 779804340160, 779838134183 ],\n    \"samples_ts\": [ 0.656603, 0.656575, 0.656547 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T18:51:03Z",
+          "avg_ns": 168039502625,
+          "stddev_ns": 744979,
+          "avg_ts": 0.761726,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            168038994633,
+            168040196408,
+            168039316835
+          ],
+          "samples_ts": [
+            0.761728,
+            0.761723,
+            0.761726
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T19:02:16Z",
+          "avg_ns": 779804587676,
+          "stddev_ns": 33435099,
+          "avg_ts": 0.656575,
+          "stddev_ts": 2.8e-05,
+          "samples_ns": [
+            779771288686,
+            779804340160,
+            779838134183
+          ],
+          "samples_ts": [
+            0.656603,
+            0.656575,
+            0.656547
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1317
+    },
+    {
+      "timestamp_utc": "2025-12-12T20:36:06.867688+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T19:41:18Z\",\n    \"avg_ns\": 676776002853,\n    \"stddev_ns\": 7338399,\n    \"avg_ts\": 0.756528,\n    \"stddev_ts\": 0.000008,\n    \"samples_ns\": [ 676768954164, 676783516222, 676775538174 ],\n    \"samples_ts\": [ 0.756536, 0.75652, 0.756529 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T20:26:25Z\",\n    \"avg_ns\": 193204537758,\n    \"stddev_ns\": 8962273,\n    \"avg_ts\": 0.662510,\n    \"stddev_ts\": 0.000031,\n    \"samples_ns\": [ 193214774854, 193198182060, 193200656361 ],\n    \"samples_ts\": [ 0.662475, 0.662532, 0.662524 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T19:41:18Z",
+          "avg_ns": 676776002853,
+          "stddev_ns": 7338399,
+          "avg_ts": 0.756528,
+          "stddev_ts": 8e-06,
+          "samples_ns": [
+            676768954164,
+            676783516222,
+            676775538174
+          ],
+          "samples_ts": [
+            0.756536,
+            0.75652,
+            0.756529
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T20:26:25Z",
+          "avg_ns": 193204537758,
+          "stddev_ns": 8962273,
+          "avg_ts": 0.66251,
+          "stddev_ts": 3.1e-05,
+          "samples_ns": [
+            193214774854,
+            193198182060,
+            193200656361
+          ],
+          "samples_ts": [
+            0.662475,
+            0.662532,
+            0.662524
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1318
+    },
+    {
+      "timestamp_utc": "2025-12-12T22:00:15.370573+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T20:36:08Z\",\n    \"avg_ns\": 676883679723,\n    \"stddev_ns\": 5679612,\n    \"avg_ts\": 0.756408,\n    \"stddev_ts\": 0.000006,\n    \"samples_ns\": [ 676878222090, 676883368508, 676889448572 ],\n    \"samples_ts\": [ 0.756414, 0.756408, 0.756401 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T21:21:15Z\",\n    \"avg_ns\": 779342256750,\n    \"stddev_ns\": 18544489,\n    \"avg_ts\": 0.656964,\n    \"stddev_ts\": 0.000016,\n    \"samples_ns\": [ 779363267472, 779335330532, 779328172246 ],\n    \"samples_ts\": [ 0.656947, 0.65697, 0.656976 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T20:36:08Z",
+          "avg_ns": 676883679723,
+          "stddev_ns": 5679612,
+          "avg_ts": 0.756408,
+          "stddev_ts": 6e-06,
+          "samples_ns": [
+            676878222090,
+            676883368508,
+            676889448572
+          ],
+          "samples_ts": [
+            0.756414,
+            0.756408,
+            0.756401
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T21:21:15Z",
+          "avg_ns": 779342256750,
+          "stddev_ns": 18544489,
+          "avg_ts": 0.656964,
+          "stddev_ts": 1.6e-05,
+          "samples_ns": [
+            779363267472,
+            779335330532,
+            779328172246
+          ],
+          "samples_ts": [
+            0.656947,
+            0.65697,
+            0.656976
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1319
+    },
+    {
+      "timestamp_utc": "2025-12-12T22:21:10.783644+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T22:00:16Z\",\n    \"avg_ns\": 168055579157,\n    \"stddev_ns\": 1146692,\n    \"avg_ts\": 0.761653,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 168055652594, 168056529733, 168054555146 ],\n    \"samples_ts\": [ 0.761652, 0.761648, 0.761657 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T22:11:28Z\",\n    \"avg_ns\": 193437092946,\n    \"stddev_ns\": 1389001,\n    \"avg_ts\": 0.661714,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 193438675868, 193436525304, 193436077666 ],\n    \"samples_ts\": [ 0.661708, 0.661716, 0.661717 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T22:00:16Z",
+          "avg_ns": 168055579157,
+          "stddev_ns": 1146692,
+          "avg_ts": 0.761653,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            168055652594,
+            168056529733,
+            168054555146
+          ],
+          "samples_ts": [
+            0.761652,
+            0.761648,
+            0.761657
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T22:11:28Z",
+          "avg_ns": 193437092946,
+          "stddev_ns": 1389001,
+          "avg_ts": 0.661714,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            193438675868,
+            193436525304,
+            193436077666
+          ],
+          "samples_ts": [
+            0.661708,
+            0.661716,
+            0.661717
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1320
+    },
+    {
+      "timestamp_utc": "2025-12-12T23:11:24.893638+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T22:21:12Z\",\n    \"avg_ns\": 168028047007,\n    \"stddev_ns\": 600287,\n    \"avg_ts\": 0.761778,\n    \"stddev_ts\": 0.000001,\n    \"samples_ns\": [ 168028099499, 168028169836, 168027871688 ],\n    \"samples_ts\": [ 0.761777, 0.761777, 0.761778 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T22:32:24Z\",\n    \"avg_ns\": 779681896156,\n    \"stddev_ns\": 24965942,\n    \"avg_ts\": 0.656678,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 779710632411, 779669080347, 779665975712 ],\n    \"samples_ts\": [ 0.656654, 0.656689, 0.656691 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T22:21:12Z",
+          "avg_ns": 168028047007,
+          "stddev_ns": 600287,
+          "avg_ts": 0.761778,
+          "stddev_ts": 1e-06,
+          "samples_ns": [
+            168028099499,
+            168028169836,
+            168027871688
+          ],
+          "samples_ts": [
+            0.761777,
+            0.761777,
+            0.761778
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-12T22:32:24Z",
+          "avg_ns": 779681896156,
+          "stddev_ns": 24965942,
+          "avg_ts": 0.656678,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            779710632411,
+            779669080347,
+            779665975712
+          ],
+          "samples_ts": [
+            0.656654,
+            0.656689,
+            0.656691
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1321
+    },
+    {
+      "timestamp_utc": "2025-12-13T00:06:03.724292+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T23:11:26Z\",\n    \"avg_ns\": 673953893562,\n    \"stddev_ns\": 3102095,\n    \"avg_ts\": 0.759696,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 673955919378, 673950600931, 673955160379 ],\n    \"samples_ts\": [ 0.759694, 0.7597, 0.759694 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-12T23:56:21Z\",\n    \"avg_ns\": 193368985761,\n    \"stddev_ns\": 5205963,\n    \"avg_ts\": 0.661947,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 193372903818, 193370974982, 193363078483 ],\n    \"samples_ts\": [ 0.661933, 0.66194, 0.661967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-12T23:11:26Z",
+          "avg_ns": 673953893562,
+          "stddev_ns": 3102095,
+          "avg_ts": 0.759696,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            673955919378,
+            673950600931,
+            673955160379
+          ],
+          "samples_ts": [
+            0.759694,
+            0.7597,
+            0.759694
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-12T23:56:21Z",
+          "avg_ns": 193368985761,
+          "stddev_ns": 5205963,
+          "avg_ts": 0.661947,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            193372903818,
+            193370974982,
+            193363078483
+          ],
+          "samples_ts": [
+            0.661933,
+            0.66194,
+            0.661967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1322
+    },
+    {
+      "timestamp_utc": "2025-12-13T01:30:01.228939+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T00:06:04Z\",\n    \"avg_ns\": 673982947258,\n    \"stddev_ns\": 2544227,\n    \"avg_ts\": 0.759663,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 673984412055, 673980009441, 673984420278 ],\n    \"samples_ts\": [ 0.759661, 0.759666, 0.759661 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T00:51:00Z\",\n    \"avg_ns\": 779535930430,\n    \"stddev_ns\": 2228063,\n    \"avg_ts\": 0.656801,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 779534968337, 779538279768, 779534543186 ],\n    \"samples_ts\": [ 0.656802, 0.656799, 0.656802 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T00:06:04Z",
+          "avg_ns": 673982947258,
+          "stddev_ns": 2544227,
+          "avg_ts": 0.759663,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            673984412055,
+            673980009441,
+            673984420278
+          ],
+          "samples_ts": [
+            0.759661,
+            0.759666,
+            0.759661
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T00:51:00Z",
+          "avg_ns": 779535930430,
+          "stddev_ns": 2228063,
+          "avg_ts": 0.656801,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            779534968337,
+            779538279768,
+            779534543186
+          ],
+          "samples_ts": [
+            0.656802,
+            0.656799,
+            0.656802
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1323
+    },
+    {
+      "timestamp_utc": "2025-12-13T01:50:55.857716+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T01:30:02Z\",\n    \"avg_ns\": 168015556952,\n    \"stddev_ns\": 1474123,\n    \"avg_ts\": 0.761834,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 168017253863, 168014592699, 168014824294 ],\n    \"samples_ts\": [ 0.761827, 0.761839, 0.761838 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T01:41:14Z\",\n    \"avg_ns\": 193227216790,\n    \"stddev_ns\": 3204510,\n    \"avg_ts\": 0.662433,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 193223611741, 193228446276, 193229592354 ],\n    \"samples_ts\": [ 0.662445, 0.662428, 0.662424 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T01:30:02Z",
+          "avg_ns": 168015556952,
+          "stddev_ns": 1474123,
+          "avg_ts": 0.761834,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            168017253863,
+            168014592699,
+            168014824294
+          ],
+          "samples_ts": [
+            0.761827,
+            0.761839,
+            0.761838
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T01:41:14Z",
+          "avg_ns": 193227216790,
+          "stddev_ns": 3204510,
+          "avg_ts": 0.662433,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            193223611741,
+            193228446276,
+            193229592354
+          ],
+          "samples_ts": [
+            0.662445,
+            0.662428,
+            0.662424
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1324
+    },
+    {
+      "timestamp_utc": "2025-12-13T02:41:09.457429+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T01:50:57Z\",\n    \"avg_ns\": 168022011609,\n    \"stddev_ns\": 1076172,\n    \"avg_ts\": 0.761805,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 168020868007, 168022487208, 168022679613 ],\n    \"samples_ts\": [ 0.76181, 0.761803, 0.761802 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T02:02:09Z\",\n    \"avg_ns\": 779533047288,\n    \"stddev_ns\": 15055328,\n    \"avg_ts\": 0.656803,\n    \"stddev_ts\": 0.000013,\n    \"samples_ns\": [ 779549556373, 779529341532, 779520243961 ],\n    \"samples_ts\": [ 0.65679, 0.656807, 0.656814 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T01:50:57Z",
+          "avg_ns": 168022011609,
+          "stddev_ns": 1076172,
+          "avg_ts": 0.761805,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            168020868007,
+            168022487208,
+            168022679613
+          ],
+          "samples_ts": [
+            0.76181,
+            0.761803,
+            0.761802
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T02:02:09Z",
+          "avg_ns": 779533047288,
+          "stddev_ns": 15055328,
+          "avg_ts": 0.656803,
+          "stddev_ts": 1.3e-05,
+          "samples_ns": [
+            779549556373,
+            779529341532,
+            779520243961
+          ],
+          "samples_ts": [
+            0.65679,
+            0.656807,
+            0.656814
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1325
+    },
+    {
+      "timestamp_utc": "2025-12-13T03:35:49.516409+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T02:41:10Z\",\n    \"avg_ns\": 674296542488,\n    \"stddev_ns\": 2147184,\n    \"avg_ts\": 0.759310,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 674297906415, 674297226756, 674294494295 ],\n    \"samples_ts\": [ 0.759308, 0.759309, 0.759312 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T03:26:07Z\",\n    \"avg_ns\": 193315878233,\n    \"stddev_ns\": 5150994,\n    \"avg_ts\": 0.662129,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 193321763102, 193312331097, 193313540501 ],\n    \"samples_ts\": [ 0.662109, 0.662141, 0.662137 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T02:41:10Z",
+          "avg_ns": 674296542488,
+          "stddev_ns": 2147184,
+          "avg_ts": 0.75931,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            674297906415,
+            674297226756,
+            674294494295
+          ],
+          "samples_ts": [
+            0.759308,
+            0.759309,
+            0.759312
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T03:26:07Z",
+          "avg_ns": 193315878233,
+          "stddev_ns": 5150994,
+          "avg_ts": 0.662129,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            193321763102,
+            193312331097,
+            193313540501
+          ],
+          "samples_ts": [
+            0.662109,
+            0.662141,
+            0.662137
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1326
+    },
+    {
+      "timestamp_utc": "2025-12-13T04:59:47.192416+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T03:35:50Z\",\n    \"avg_ns\": 674286673891,\n    \"stddev_ns\": 4488874,\n    \"avg_ts\": 0.759321,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 674286025316, 674282620940, 674291375418 ],\n    \"samples_ts\": [ 0.759322, 0.759326, 0.759316 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T04:20:47Z\",\n    \"avg_ns\": 779192846415,\n    \"stddev_ns\": 5955329,\n    \"avg_ts\": 0.657090,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 779198427046, 779193421984, 779186690216 ],\n    \"samples_ts\": [ 0.657086, 0.65709, 0.657095 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T03:35:50Z",
+          "avg_ns": 674286673891,
+          "stddev_ns": 4488874,
+          "avg_ts": 0.759321,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            674286025316,
+            674282620940,
+            674291375418
+          ],
+          "samples_ts": [
+            0.759322,
+            0.759326,
+            0.759316
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T04:20:47Z",
+          "avg_ns": 779192846415,
+          "stddev_ns": 5955329,
+          "avg_ts": 0.65709,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            779198427046,
+            779193421984,
+            779186690216
+          ],
+          "samples_ts": [
+            0.657086,
+            0.65709,
+            0.657095
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1327
+    },
+    {
+      "timestamp_utc": "2025-12-13T05:20:42.334593+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T04:59:48Z\",\n    \"avg_ns\": 168037124575,\n    \"stddev_ns\": 588014,\n    \"avg_ts\": 0.761736,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 168037774841, 168036630248, 168036968636 ],\n    \"samples_ts\": [ 0.761733, 0.761739, 0.761737 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T05:11:00Z\",\n    \"avg_ns\": 193371094742,\n    \"stddev_ns\": 7729272,\n    \"avg_ts\": 0.661940,\n    \"stddev_ts\": 0.000026,\n    \"samples_ns\": [ 193379943863, 193367675890, 193365664473 ],\n    \"samples_ts\": [ 0.661909, 0.661951, 0.661958 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T04:59:48Z",
+          "avg_ns": 168037124575,
+          "stddev_ns": 588014,
+          "avg_ts": 0.761736,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            168037774841,
+            168036630248,
+            168036968636
+          ],
+          "samples_ts": [
+            0.761733,
+            0.761739,
+            0.761737
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T05:11:00Z",
+          "avg_ns": 193371094742,
+          "stddev_ns": 7729272,
+          "avg_ts": 0.66194,
+          "stddev_ts": 2.6e-05,
+          "samples_ns": [
+            193379943863,
+            193367675890,
+            193365664473
+          ],
+          "samples_ts": [
+            0.661909,
+            0.661951,
+            0.661958
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1328
+    },
+    {
+      "timestamp_utc": "2025-12-13T06:10:56.779634+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T05:20:43Z\",\n    \"avg_ns\": 168040690359,\n    \"stddev_ns\": 1098581,\n    \"avg_ts\": 0.761720,\n    \"stddev_ts\": 0.000005,\n    \"samples_ns\": [ 168041599262, 168039588403, 168040883413 ],\n    \"samples_ts\": [ 0.761716, 0.761725, 0.761719 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T05:31:55Z\",\n    \"avg_ns\": 779790553788,\n    \"stddev_ns\": 4384080,\n    \"avg_ts\": 0.656587,\n    \"stddev_ts\": 0.000004,\n    \"samples_ns\": [ 779793107515, 779785491555, 779793062294 ],\n    \"samples_ts\": [ 0.656584, 0.656591, 0.656584 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T05:20:43Z",
+          "avg_ns": 168040690359,
+          "stddev_ns": 1098581,
+          "avg_ts": 0.76172,
+          "stddev_ts": 5e-06,
+          "samples_ns": [
+            168041599262,
+            168039588403,
+            168040883413
+          ],
+          "samples_ts": [
+            0.761716,
+            0.761725,
+            0.761719
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T05:31:55Z",
+          "avg_ns": 779790553788,
+          "stddev_ns": 4384080,
+          "avg_ts": 0.656587,
+          "stddev_ts": 4e-06,
+          "samples_ns": [
+            779793107515,
+            779785491555,
+            779793062294
+          ],
+          "samples_ts": [
+            0.656584,
+            0.656591,
+            0.656584
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1329
+    },
+    {
+      "timestamp_utc": "2025-12-13T07:05:46.225198+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T06:10:58Z\",\n    \"avg_ns\": 676716084061,\n    \"stddev_ns\": 17880072,\n    \"avg_ts\": 0.756595,\n    \"stddev_ts\": 0.000020,\n    \"samples_ns\": [ 676722856960, 676729588226, 676695806997 ],\n    \"samples_ts\": [ 0.756587, 0.75658, 0.756618 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T06:56:04Z\",\n    \"avg_ns\": 193215427157,\n    \"stddev_ns\": 6202083,\n    \"avg_ts\": 0.662473,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 193222564721, 193211605374, 193212111377 ],\n    \"samples_ts\": [ 0.662449, 0.662486, 0.662484 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T06:10:58Z",
+          "avg_ns": 676716084061,
+          "stddev_ns": 17880072,
+          "avg_ts": 0.756595,
+          "stddev_ts": 2e-05,
+          "samples_ns": [
+            676722856960,
+            676729588226,
+            676695806997
+          ],
+          "samples_ts": [
+            0.756587,
+            0.75658,
+            0.756618
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T06:56:04Z",
+          "avg_ns": 193215427157,
+          "stddev_ns": 6202083,
+          "avg_ts": 0.662473,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            193222564721,
+            193211605374,
+            193212111377
+          ],
+          "samples_ts": [
+            0.662449,
+            0.662486,
+            0.662484
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1330
+    },
+    {
+      "timestamp_utc": "2025-12-13T08:29:54.262511+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "1",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T07:05:47Z\",\n    \"avg_ns\": 676759878721,\n    \"stddev_ns\": 2583177,\n    \"avg_ts\": 0.756546,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 676758183116, 676762851729, 676758601318 ],\n    \"samples_ts\": [ 0.756548, 0.756543, 0.756547 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 1,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T07:50:54Z\",\n    \"avg_ns\": 779349086770,\n    \"stddev_ns\": 54940115,\n    \"avg_ts\": 0.656958,\n    \"stddev_ts\": 0.000046,\n    \"samples_ns\": [ 779412477126, 779315420789, 779319362396 ],\n    \"samples_ts\": [ 0.656905, 0.656987, 0.656984 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T07:05:47Z",
+          "avg_ns": 676759878721,
+          "stddev_ns": 2583177,
+          "avg_ts": 0.756546,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            676758183116,
+            676762851729,
+            676758601318
+          ],
+          "samples_ts": [
+            0.756548,
+            0.756543,
+            0.756547
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 1,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T07:50:54Z",
+          "avg_ns": 779349086770,
+          "stddev_ns": 54940115,
+          "avg_ts": 0.656958,
+          "stddev_ts": 4.6e-05,
+          "samples_ns": [
+            779412477126,
+            779315420789,
+            779319362396
+          ],
+          "samples_ts": [
+            0.656905,
+            0.656987,
+            0.656984
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 1,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1331
+    },
+    {
+      "timestamp_utc": "2025-12-13T08:40:39.636426+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T08:29:55Z\",\n    \"avg_ns\": 85131239553,\n    \"stddev_ns\": 2438518,\n    \"avg_ts\": 1.503561,\n    \"stddev_ts\": 0.000042,\n    \"samples_ns\": [ 85133482891, 85128703075, 85131532695 ],\n    \"samples_ts\": [ 1.50352, 1.50361, 1.50356 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T08:35:35Z\",\n    \"avg_ns\": 100918349893,\n    \"stddev_ns\": 68863766,\n    \"avg_ts\": 1.268352,\n    \"stddev_ts\": 0.000865,\n    \"samples_ns\": [ 100997824407, 100876361074, 100880864198 ],\n    \"samples_ts\": [ 1.26735, 1.26888, 1.26882 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T08:29:55Z",
+          "avg_ns": 85131239553,
+          "stddev_ns": 2438518,
+          "avg_ts": 1.503561,
+          "stddev_ts": 4.2e-05,
+          "samples_ns": [
+            85133482891,
+            85128703075,
+            85131532695
+          ],
+          "samples_ts": [
+            1.50352,
+            1.50361,
+            1.50356
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T08:35:35Z",
+          "avg_ns": 100918349893,
+          "stddev_ns": 68863766,
+          "avg_ts": 1.268352,
+          "stddev_ts": 0.000865,
+          "samples_ns": [
+            100997824407,
+            100876361074,
+            100880864198
+          ],
+          "samples_ts": [
+            1.26735,
+            1.26888,
+            1.26882
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1332
+    },
+    {
+      "timestamp_utc": "2025-12-13T09:06:39.801911+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T08:40:40Z\",\n    \"avg_ns\": 84164730480,\n    \"stddev_ns\": 2638895,\n    \"avg_ts\": 1.520827,\n    \"stddev_ts\": 0.000047,\n    \"samples_ns\": [ 84163624215, 84167707984, 84162859243 ],\n    \"samples_ts\": [ 1.52085, 1.52077, 1.52086 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T08:46:17Z\",\n    \"avg_ns\": 407098546397,\n    \"stddev_ns\": 23192642,\n    \"avg_ts\": 1.257681,\n    \"stddev_ts\": 0.000072,\n    \"samples_ns\": [ 407112958824, 407110887821, 407071792546 ],\n    \"samples_ts\": [ 1.25764, 1.25764, 1.25776 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T08:40:40Z",
+          "avg_ns": 84164730480,
+          "stddev_ns": 2638895,
+          "avg_ts": 1.520827,
+          "stddev_ts": 4.7e-05,
+          "samples_ns": [
+            84163624215,
+            84167707984,
+            84162859243
+          ],
+          "samples_ts": [
+            1.52085,
+            1.52077,
+            1.52086
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T08:46:17Z",
+          "avg_ns": 407098546397,
+          "stddev_ns": 23192642,
+          "avg_ts": 1.257681,
+          "stddev_ts": 7.2e-05,
+          "samples_ns": [
+            407112958824,
+            407110887821,
+            407071792546
+          ],
+          "samples_ts": [
+            1.25764,
+            1.25764,
+            1.25776
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1333
+    },
+    {
+      "timestamp_utc": "2025-12-13T09:34:15.113853+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T09:06:41Z\",\n    \"avg_ns\": 337632528196,\n    \"stddev_ns\": 3705140,\n    \"avg_ts\": 1.516442,\n    \"stddev_ts\": 0.000016,\n    \"samples_ns\": [ 337634285836, 337628372820, 337634925934 ],\n    \"samples_ts\": [ 1.51643, 1.51646, 1.51643 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T09:29:11Z\",\n    \"avg_ns\": 100866333319,\n    \"stddev_ns\": 6075681,\n    \"avg_ts\": 1.269006,\n    \"stddev_ts\": 0.000076,\n    \"samples_ns\": [ 100873295883, 100862177449, 100863526626 ],\n    \"samples_ts\": [ 1.26892, 1.26906, 1.26904 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T09:06:41Z",
+          "avg_ns": 337632528196,
+          "stddev_ns": 3705140,
+          "avg_ts": 1.516442,
+          "stddev_ts": 1.6e-05,
+          "samples_ns": [
+            337634285836,
+            337628372820,
+            337634925934
+          ],
+          "samples_ts": [
+            1.51643,
+            1.51646,
+            1.51643
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T09:29:11Z",
+          "avg_ns": 100866333319,
+          "stddev_ns": 6075681,
+          "avg_ts": 1.269006,
+          "stddev_ts": 7.6e-05,
+          "samples_ns": [
+            100873295883,
+            100862177449,
+            100863526626
+          ],
+          "samples_ts": [
+            1.26892,
+            1.26906,
+            1.26904
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1334
+    },
+    {
+      "timestamp_utc": "2025-12-13T10:17:08.666611+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T09:34:16Z\",\n    \"avg_ns\": 337640533854,\n    \"stddev_ns\": 2266029,\n    \"avg_ts\": 1.516406,\n    \"stddev_ts\": 0.000010,\n    \"samples_ns\": [ 337640381599, 337642872171, 337638347792 ],\n    \"samples_ts\": [ 1.51641, 1.5164, 1.51642 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T09:56:46Z\",\n    \"avg_ns\": 406927074163,\n    \"stddev_ns\": 10046751,\n    \"avg_ts\": 1.258211,\n    \"stddev_ts\": 0.000031,\n    \"samples_ns\": [ 406931139949, 406934429153, 406915653388 ],\n    \"samples_ts\": [ 1.2582, 1.25819, 1.25825 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T09:34:16Z",
+          "avg_ns": 337640533854,
+          "stddev_ns": 2266029,
+          "avg_ts": 1.516406,
+          "stddev_ts": 1e-05,
+          "samples_ns": [
+            337640381599,
+            337642872171,
+            337638347792
+          ],
+          "samples_ts": [
+            1.51641,
+            1.5164,
+            1.51642
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T09:56:46Z",
+          "avg_ns": 406927074163,
+          "stddev_ns": 10046751,
+          "avg_ts": 1.258211,
+          "stddev_ts": 3.1e-05,
+          "samples_ns": [
+            406931139949,
+            406934429153,
+            406915653388
+          ],
+          "samples_ts": [
+            1.2582,
+            1.25819,
+            1.25825
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1335
+    },
+    {
+      "timestamp_utc": "2025-12-13T10:27:49.937355+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T10:17:09Z\",\n    \"avg_ns\": 84159763909,\n    \"stddev_ns\": 3944109,\n    \"avg_ts\": 1.520917,\n    \"stddev_ts\": 0.000071,\n    \"samples_ns\": [ 84155325111, 84161201979, 84162764639 ],\n    \"samples_ts\": [ 1.521, 1.52089, 1.52086 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T10:22:46Z\",\n    \"avg_ns\": 100808517923,\n    \"stddev_ns\": 3224859,\n    \"avg_ts\": 1.269734,\n    \"stddev_ts\": 0.000040,\n    \"samples_ns\": [ 100812106273, 100805988281, 100807459217 ],\n    \"samples_ts\": [ 1.26969, 1.26977, 1.26975 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T10:17:09Z",
+          "avg_ns": 84159763909,
+          "stddev_ns": 3944109,
+          "avg_ts": 1.520917,
+          "stddev_ts": 7.1e-05,
+          "samples_ns": [
+            84155325111,
+            84161201979,
+            84162764639
+          ],
+          "samples_ts": [
+            1.521,
+            1.52089,
+            1.52086
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T10:22:46Z",
+          "avg_ns": 100808517923,
+          "stddev_ns": 3224859,
+          "avg_ts": 1.269734,
+          "stddev_ts": 4e-05,
+          "samples_ns": [
+            100812106273,
+            100805988281,
+            100807459217
+          ],
+          "samples_ts": [
+            1.26969,
+            1.26977,
+            1.26975
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1336
+    },
+    {
+      "timestamp_utc": "2025-12-13T10:53:49.854357+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T10:27:51Z\",\n    \"avg_ns\": 84172732077,\n    \"stddev_ns\": 2635847,\n    \"avg_ts\": 1.520682,\n    \"stddev_ts\": 0.000048,\n    \"samples_ns\": [ 84175449740, 84172560016, 84170186475 ],\n    \"samples_ts\": [ 1.52063, 1.52069, 1.52073 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T10:33:27Z\",\n    \"avg_ns\": 407013637039,\n    \"stddev_ns\": 7472474,\n    \"avg_ts\": 1.257943,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 407022217093, 407008556517, 407010137507 ],\n    \"samples_ts\": [ 1.25792, 1.25796, 1.25795 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T10:27:51Z",
+          "avg_ns": 84172732077,
+          "stddev_ns": 2635847,
+          "avg_ts": 1.520682,
+          "stddev_ts": 4.8e-05,
+          "samples_ns": [
+            84175449740,
+            84172560016,
+            84170186475
+          ],
+          "samples_ts": [
+            1.52063,
+            1.52069,
+            1.52073
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T10:33:27Z",
+          "avg_ns": 407013637039,
+          "stddev_ns": 7472474,
+          "avg_ts": 1.257943,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            407022217093,
+            407008556517,
+            407010137507
+          ],
+          "samples_ts": [
+            1.25792,
+            1.25796,
+            1.25795
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1337
+    },
+    {
+      "timestamp_utc": "2025-12-13T11:21:27.452910+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T10:53:51Z\",\n    \"avg_ns\": 338210927568,\n    \"stddev_ns\": 1763631,\n    \"avg_ts\": 1.513848,\n    \"stddev_ts\": 0.000007,\n    \"samples_ns\": [ 338212382892, 338211119417, 338209280397 ],\n    \"samples_ts\": [ 1.51384, 1.51385, 1.51386 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T11:16:24Z\",\n    \"avg_ns\": 100824108312,\n    \"stddev_ns\": 2667611,\n    \"avg_ts\": 1.269538,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 100824759090, 100826370914, 100821194933 ],\n    \"samples_ts\": [ 1.26953, 1.26951, 1.26957 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T10:53:51Z",
+          "avg_ns": 338210927568,
+          "stddev_ns": 1763631,
+          "avg_ts": 1.513848,
+          "stddev_ts": 7e-06,
+          "samples_ns": [
+            338212382892,
+            338211119417,
+            338209280397
+          ],
+          "samples_ts": [
+            1.51384,
+            1.51385,
+            1.51386
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T11:16:24Z",
+          "avg_ns": 100824108312,
+          "stddev_ns": 2667611,
+          "avg_ts": 1.269538,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            100824759090,
+            100826370914,
+            100821194933
+          ],
+          "samples_ts": [
+            1.26953,
+            1.26951,
+            1.26957
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1338
+    },
+    {
+      "timestamp_utc": "2025-12-13T12:04:24.142759+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T11:21:28Z\",\n    \"avg_ns\": 338180579564,\n    \"stddev_ns\": 874333,\n    \"avg_ts\": 1.513984,\n    \"stddev_ts\": 0.000003,\n    \"samples_ns\": [ 338180693862, 338181167768, 338179877063 ],\n    \"samples_ts\": [ 1.51398, 1.51398, 1.51399 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T11:44:01Z\",\n    \"avg_ns\": 407236685756,\n    \"stddev_ns\": 7052806,\n    \"avg_ts\": 1.257254,\n    \"stddev_ts\": 0.000022,\n    \"samples_ns\": [ 407244407845, 407230774197, 407234875228 ],\n    \"samples_ts\": [ 1.25723, 1.25727, 1.25726 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T11:21:28Z",
+          "avg_ns": 338180579564,
+          "stddev_ns": 874333,
+          "avg_ts": 1.513984,
+          "stddev_ts": 3e-06,
+          "samples_ns": [
+            338180693862,
+            338181167768,
+            338179877063
+          ],
+          "samples_ts": [
+            1.51398,
+            1.51398,
+            1.51399
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T11:44:01Z",
+          "avg_ns": 407236685756,
+          "stddev_ns": 7052806,
+          "avg_ts": 1.257254,
+          "stddev_ts": 2.2e-05,
+          "samples_ns": [
+            407244407845,
+            407230774197,
+            407234875228
+          ],
+          "samples_ts": [
+            1.25723,
+            1.25727,
+            1.25726
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1339
+    },
+    {
+      "timestamp_utc": "2025-12-13T12:15:05.600843+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T12:04:25Z\",\n    \"avg_ns\": 84160897546,\n    \"stddev_ns\": 776595,\n    \"avg_ts\": 1.520896,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 84161658753, 84160502718, 84160531169 ],\n    \"samples_ts\": [ 1.52088, 1.5209, 1.5209 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T12:10:02Z\",\n    \"avg_ns\": 100845989123,\n    \"stddev_ns\": 4571857,\n    \"avg_ts\": 1.269262,\n    \"stddev_ts\": 0.000057,\n    \"samples_ns\": [ 100850688223, 100845674036, 100841605112 ],\n    \"samples_ts\": [ 1.2692, 1.26927, 1.26932 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T12:04:25Z",
+          "avg_ns": 84160897546,
+          "stddev_ns": 776595,
+          "avg_ts": 1.520896,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            84161658753,
+            84160502718,
+            84160531169
+          ],
+          "samples_ts": [
+            1.52088,
+            1.5209,
+            1.5209
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T12:10:02Z",
+          "avg_ns": 100845989123,
+          "stddev_ns": 4571857,
+          "avg_ts": 1.269262,
+          "stddev_ts": 5.7e-05,
+          "samples_ns": [
+            100850688223,
+            100845674036,
+            100841605112
+          ],
+          "samples_ts": [
+            1.2692,
+            1.26927,
+            1.26932
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1340
+    },
+    {
+      "timestamp_utc": "2025-12-13T12:41:09.377696+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T12:15:06Z\",\n    \"avg_ns\": 85152069416,\n    \"stddev_ns\": 3178624,\n    \"avg_ts\": 1.503193,\n    \"stddev_ts\": 0.000056,\n    \"samples_ns\": [ 85148790966, 85152340369, 85155076915 ],\n    \"samples_ts\": [ 1.50325, 1.50319, 1.50314 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T12:20:47Z\",\n    \"avg_ns\": 406976576429,\n    \"stddev_ns\": 34770400,\n    \"avg_ts\": 1.258058,\n    \"stddev_ts\": 0.000107,\n    \"samples_ns\": [ 407015908309, 406963891403, 406949929575 ],\n    \"samples_ts\": [ 1.25794, 1.2581, 1.25814 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T12:15:06Z",
+          "avg_ns": 85152069416,
+          "stddev_ns": 3178624,
+          "avg_ts": 1.503193,
+          "stddev_ts": 5.6e-05,
+          "samples_ns": [
+            85148790966,
+            85152340369,
+            85155076915
+          ],
+          "samples_ts": [
+            1.50325,
+            1.50319,
+            1.50314
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T12:20:47Z",
+          "avg_ns": 406976576429,
+          "stddev_ns": 34770400,
+          "avg_ts": 1.258058,
+          "stddev_ts": 0.000107,
+          "samples_ns": [
+            407015908309,
+            406963891403,
+            406949929575
+          ],
+          "samples_ts": [
+            1.25794,
+            1.2581,
+            1.25814
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1341
+    },
+    {
+      "timestamp_utc": "2025-12-13T13:08:52.741846+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T12:41:10Z\",\n    \"avg_ns\": 339686539245,\n    \"stddev_ns\": 2493131,\n    \"avg_ts\": 1.507272,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 339689328914, 339684946095, 339685342727 ],\n    \"samples_ts\": [ 1.50726, 1.50728, 1.50728 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T13:03:49Z\",\n    \"avg_ns\": 100778345256,\n    \"stddev_ns\": 10287536,\n    \"avg_ts\": 1.270114,\n    \"stddev_ts\": 0.000130,\n    \"samples_ns\": [ 100790204149, 100771908245, 100772923375 ],\n    \"samples_ts\": [ 1.26996, 1.2702, 1.27018 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T12:41:10Z",
+          "avg_ns": 339686539245,
+          "stddev_ns": 2493131,
+          "avg_ts": 1.507272,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            339689328914,
+            339684946095,
+            339685342727
+          ],
+          "samples_ts": [
+            1.50726,
+            1.50728,
+            1.50728
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T13:03:49Z",
+          "avg_ns": 100778345256,
+          "stddev_ns": 10287536,
+          "avg_ts": 1.270114,
+          "stddev_ts": 0.00013,
+          "samples_ns": [
+            100790204149,
+            100771908245,
+            100772923375
+          ],
+          "samples_ts": [
+            1.26996,
+            1.2702,
+            1.27018
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1342
+    },
+    {
+      "timestamp_utc": "2025-12-13T13:51:55.024932+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T13:08:54Z\",\n    \"avg_ns\": 339702175957,\n    \"stddev_ns\": 901582,\n    \"avg_ts\": 1.507203,\n    \"stddev_ts\": 0.000002,\n    \"samples_ns\": [ 339701768573, 339702474384, 339702284916 ],\n    \"samples_ts\": [ 1.5072, 1.5072, 1.5072 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T13:31:33Z\",\n    \"avg_ns\": 406891215792,\n    \"stddev_ns\": 10298107,\n    \"avg_ts\": 1.258322,\n    \"stddev_ts\": 0.000032,\n    \"samples_ns\": [ 406903056007, 406884988730, 406885602641 ],\n    \"samples_ts\": [ 1.25828, 1.25834, 1.25834 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T13:08:54Z",
+          "avg_ns": 339702175957,
+          "stddev_ns": 901582,
+          "avg_ts": 1.507203,
+          "stddev_ts": 2e-06,
+          "samples_ns": [
+            339701768573,
+            339702474384,
+            339702284916
+          ],
+          "samples_ts": [
+            1.5072,
+            1.5072,
+            1.5072
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T13:31:33Z",
+          "avg_ns": 406891215792,
+          "stddev_ns": 10298107,
+          "avg_ts": 1.258322,
+          "stddev_ts": 3.2e-05,
+          "samples_ns": [
+            406903056007,
+            406884988730,
+            406885602641
+          ],
+          "samples_ts": [
+            1.25828,
+            1.25834,
+            1.25834
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1343
+    },
+    {
+      "timestamp_utc": "2025-12-13T14:02:36.415547+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T13:51:56Z\",\n    \"avg_ns\": 84168098897,\n    \"stddev_ns\": 610052,\n    \"avg_ts\": 1.520766,\n    \"stddev_ts\": 0.000010,\n    \"samples_ns\": [ 84167850910, 84168714697, 84167731085 ],\n    \"samples_ts\": [ 1.52077, 1.52076, 1.52077 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T13:57:32Z\",\n    \"avg_ns\": 100839798784,\n    \"stddev_ns\": 3168570,\n    \"avg_ts\": 1.269340,\n    \"stddev_ts\": 0.000040,\n    \"samples_ns\": [ 100843184518, 100836947642, 100839264193 ],\n    \"samples_ts\": [ 1.2693, 1.26938, 1.26935 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T13:51:56Z",
+          "avg_ns": 84168098897,
+          "stddev_ns": 610052,
+          "avg_ts": 1.520766,
+          "stddev_ts": 1e-05,
+          "samples_ns": [
+            84167850910,
+            84168714697,
+            84167731085
+          ],
+          "samples_ts": [
+            1.52077,
+            1.52076,
+            1.52077
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T13:57:32Z",
+          "avg_ns": 100839798784,
+          "stddev_ns": 3168570,
+          "avg_ts": 1.26934,
+          "stddev_ts": 4e-05,
+          "samples_ns": [
+            100843184518,
+            100836947642,
+            100839264193
+          ],
+          "samples_ts": [
+            1.2693,
+            1.26938,
+            1.26935
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1344
+    },
+    {
+      "timestamp_utc": "2025-12-13T14:28:36.468645+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T14:02:37Z\",\n    \"avg_ns\": 84158594310,\n    \"stddev_ns\": 4254989,\n    \"avg_ts\": 1.520938,\n    \"stddev_ts\": 0.000077,\n    \"samples_ns\": [ 84157230095, 84155188738, 84163364097 ],\n    \"samples_ts\": [ 1.52096, 1.521, 1.52085 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T14:08:14Z\",\n    \"avg_ns\": 407032008919,\n    \"stddev_ns\": 7869458,\n    \"avg_ts\": 1.257886,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 407040879110, 407029024379, 407026123270 ],\n    \"samples_ts\": [ 1.25786, 1.2579, 1.2579 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T14:02:37Z",
+          "avg_ns": 84158594310,
+          "stddev_ns": 4254989,
+          "avg_ts": 1.520938,
+          "stddev_ts": 7.7e-05,
+          "samples_ns": [
+            84157230095,
+            84155188738,
+            84163364097
+          ],
+          "samples_ts": [
+            1.52096,
+            1.521,
+            1.52085
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T14:08:14Z",
+          "avg_ns": 407032008919,
+          "stddev_ns": 7869458,
+          "avg_ts": 1.257886,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            407040879110,
+            407029024379,
+            407026123270
+          ],
+          "samples_ts": [
+            1.25786,
+            1.2579,
+            1.2579
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1345
+    },
+    {
+      "timestamp_utc": "2025-12-13T14:56:11.952753+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T14:28:37Z\",\n    \"avg_ns\": 337657983815,\n    \"stddev_ns\": 3412615,\n    \"avg_ts\": 1.516327,\n    \"stddev_ts\": 0.000015,\n    \"samples_ns\": [ 337660449287, 337659413227, 337654088931 ],\n    \"samples_ts\": [ 1.51632, 1.51632, 1.51634 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T14:51:08Z\",\n    \"avg_ns\": 100834981673,\n    \"stddev_ns\": 1880542,\n    \"avg_ts\": 1.269401,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 100836919170, 100834862053, 100833163796 ],\n    \"samples_ts\": [ 1.26938, 1.2694, 1.26942 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T14:28:37Z",
+          "avg_ns": 337657983815,
+          "stddev_ns": 3412615,
+          "avg_ts": 1.516327,
+          "stddev_ts": 1.5e-05,
+          "samples_ns": [
+            337660449287,
+            337659413227,
+            337654088931
+          ],
+          "samples_ts": [
+            1.51632,
+            1.51632,
+            1.51634
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T14:51:08Z",
+          "avg_ns": 100834981673,
+          "stddev_ns": 1880542,
+          "avg_ts": 1.269401,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            100836919170,
+            100834862053,
+            100833163796
+          ],
+          "samples_ts": [
+            1.26938,
+            1.2694,
+            1.26942
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1346
+    },
+    {
+      "timestamp_utc": "2025-12-13T15:39:06.096320+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T14:56:13Z\",\n    \"avg_ns\": 337654363281,\n    \"stddev_ns\": 5614581,\n    \"avg_ts\": 1.516344,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 337659633336, 337648563114, 337654893395 ],\n    \"samples_ts\": [ 1.51632, 1.51637, 1.51634 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T15:18:43Z\",\n    \"avg_ns\": 407104604468,\n    \"stddev_ns\": 9580238,\n    \"avg_ts\": 1.257662,\n    \"stddev_ts\": 0.000029,\n    \"samples_ns\": [ 407113898195, 407094840307, 407105074904 ],\n    \"samples_ts\": [ 1.25763, 1.25769, 1.25766 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T14:56:13Z",
+          "avg_ns": 337654363281,
+          "stddev_ns": 5614581,
+          "avg_ts": 1.516344,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            337659633336,
+            337648563114,
+            337654893395
+          ],
+          "samples_ts": [
+            1.51632,
+            1.51637,
+            1.51634
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T15:18:43Z",
+          "avg_ns": 407104604468,
+          "stddev_ns": 9580238,
+          "avg_ts": 1.257662,
+          "stddev_ts": 2.9e-05,
+          "samples_ns": [
+            407113898195,
+            407094840307,
+            407105074904
+          ],
+          "samples_ts": [
+            1.25763,
+            1.25769,
+            1.25766
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1347
+    },
+    {
+      "timestamp_utc": "2025-12-13T15:49:47.316219+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T15:39:07Z\",\n    \"avg_ns\": 84175416635,\n    \"stddev_ns\": 1315037,\n    \"avg_ts\": 1.520634,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 84174267034, 84176800114, 84175182758 ],\n    \"samples_ts\": [ 1.52065, 1.52061, 1.52064 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T15:44:44Z\",\n    \"avg_ns\": 100782168244,\n    \"stddev_ns\": 5796444,\n    \"avg_ts\": 1.270066,\n    \"stddev_ts\": 0.000073,\n    \"samples_ns\": [ 100788607047, 100777398247, 100780499439 ],\n    \"samples_ts\": [ 1.26998, 1.27013, 1.27009 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T15:39:07Z",
+          "avg_ns": 84175416635,
+          "stddev_ns": 1315037,
+          "avg_ts": 1.520634,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            84174267034,
+            84176800114,
+            84175182758
+          ],
+          "samples_ts": [
+            1.52065,
+            1.52061,
+            1.52064
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T15:44:44Z",
+          "avg_ns": 100782168244,
+          "stddev_ns": 5796444,
+          "avg_ts": 1.270066,
+          "stddev_ts": 7.3e-05,
+          "samples_ns": [
+            100788607047,
+            100777398247,
+            100780499439
+          ],
+          "samples_ts": [
+            1.26998,
+            1.27013,
+            1.27009
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1348
+    },
+    {
+      "timestamp_utc": "2025-12-13T16:15:46.875126+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T15:49:48Z\",\n    \"avg_ns\": 84146820708,\n    \"stddev_ns\": 684263,\n    \"avg_ts\": 1.521151,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 84147229896, 84147124537, 84146107692 ],\n    \"samples_ts\": [ 1.52114, 1.52115, 1.52116 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T15:55:25Z\",\n    \"avg_ns\": 406926932855,\n    \"stddev_ns\": 7772097,\n    \"avg_ts\": 1.258211,\n    \"stddev_ts\": 0.000024,\n    \"samples_ns\": [ 406933897890, 406918632884, 406928267793 ],\n    \"samples_ts\": [ 1.25819, 1.25824, 1.25821 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T15:49:48Z",
+          "avg_ns": 84146820708,
+          "stddev_ns": 684263,
+          "avg_ts": 1.521151,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            84147229896,
+            84147124537,
+            84146107692
+          ],
+          "samples_ts": [
+            1.52114,
+            1.52115,
+            1.52116
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T15:55:25Z",
+          "avg_ns": 406926932855,
+          "stddev_ns": 7772097,
+          "avg_ts": 1.258211,
+          "stddev_ts": 2.4e-05,
+          "samples_ns": [
+            406933897890,
+            406918632884,
+            406928267793
+          ],
+          "samples_ts": [
+            1.25819,
+            1.25824,
+            1.25821
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1349
+    },
+    {
+      "timestamp_utc": "2025-12-13T16:43:24.139111+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T16:15:48Z\",\n    \"avg_ns\": 338133480258,\n    \"stddev_ns\": 2418629,\n    \"avg_ts\": 1.514195,\n    \"stddev_ts\": 0.000010,\n    \"samples_ns\": [ 338132343155, 338131998506, 338136099115 ],\n    \"samples_ts\": [ 1.5142, 1.5142, 1.51418 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T16:38:20Z\",\n    \"avg_ns\": 100826367401,\n    \"stddev_ns\": 3386571,\n    \"avg_ts\": 1.269509,\n    \"stddev_ts\": 0.000042,\n    \"samples_ns\": [ 100830078296, 100823542850, 100825481059 ],\n    \"samples_ts\": [ 1.26946, 1.26954, 1.26952 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T16:15:48Z",
+          "avg_ns": 338133480258,
+          "stddev_ns": 2418629,
+          "avg_ts": 1.514195,
+          "stddev_ts": 1e-05,
+          "samples_ns": [
+            338132343155,
+            338131998506,
+            338136099115
+          ],
+          "samples_ts": [
+            1.5142,
+            1.5142,
+            1.51418
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T16:38:20Z",
+          "avg_ns": 100826367401,
+          "stddev_ns": 3386571,
+          "avg_ts": 1.269509,
+          "stddev_ts": 4.2e-05,
+          "samples_ns": [
+            100830078296,
+            100823542850,
+            100825481059
+          ],
+          "samples_ts": [
+            1.26946,
+            1.26954,
+            1.26952
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1350
+    },
+    {
+      "timestamp_utc": "2025-12-13T17:26:19.416722+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T16:43:25Z\",\n    \"avg_ns\": 338075989060,\n    \"stddev_ns\": 3420618,\n    \"avg_ts\": 1.514452,\n    \"stddev_ts\": 0.000015,\n    \"samples_ns\": [ 338073232242, 338074987864, 338079747075 ],\n    \"samples_ts\": [ 1.51446, 1.51446, 1.51444 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T17:05:57Z\",\n    \"avg_ns\": 406903619401,\n    \"stddev_ns\": 8967106,\n    \"avg_ts\": 1.258283,\n    \"stddev_ts\": 0.000028,\n    \"samples_ns\": [ 406913897704, 406897603054, 406899357446 ],\n    \"samples_ts\": [ 1.25825, 1.2583, 1.2583 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T16:43:25Z",
+          "avg_ns": 338075989060,
+          "stddev_ns": 3420618,
+          "avg_ts": 1.514452,
+          "stddev_ts": 1.5e-05,
+          "samples_ns": [
+            338073232242,
+            338074987864,
+            338079747075
+          ],
+          "samples_ts": [
+            1.51446,
+            1.51446,
+            1.51444
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T17:05:57Z",
+          "avg_ns": 406903619401,
+          "stddev_ns": 8967106,
+          "avg_ts": 1.258283,
+          "stddev_ts": 2.8e-05,
+          "samples_ns": [
+            406913897704,
+            406897603054,
+            406899357446
+          ],
+          "samples_ts": [
+            1.25825,
+            1.2583,
+            1.2583
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1351
+    },
+    {
+      "timestamp_utc": "2025-12-13T17:37:08.215369+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T17:26:20Z\",\n    \"avg_ns\": 85161135650,\n    \"stddev_ns\": 2789803,\n    \"avg_ts\": 1.503033,\n    \"stddev_ts\": 0.000049,\n    \"samples_ns\": [ 85158766996, 85164165185, 85160474771 ],\n    \"samples_ts\": [ 1.50307, 1.50298, 1.50304 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T17:32:01Z\",\n    \"avg_ns\": 101987582030,\n    \"stddev_ns\": 612514991,\n    \"avg_ts\": 1.255085,\n    \"stddev_ts\": 0.007564,\n    \"samples_ns\": [ 102344097336, 102338330086, 101280318670 ],\n    \"samples_ts\": [ 1.25068, 1.25075, 1.26382 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T17:26:20Z",
+          "avg_ns": 85161135650,
+          "stddev_ns": 2789803,
+          "avg_ts": 1.503033,
+          "stddev_ts": 4.9e-05,
+          "samples_ns": [
+            85158766996,
+            85164165185,
+            85160474771
+          ],
+          "samples_ts": [
+            1.50307,
+            1.50298,
+            1.50304
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T17:32:01Z",
+          "avg_ns": 101987582030,
+          "stddev_ns": 612514991,
+          "avg_ts": 1.255085,
+          "stddev_ts": 0.007564,
+          "samples_ns": [
+            102344097336,
+            102338330086,
+            101280318670
+          ],
+          "samples_ts": [
+            1.25068,
+            1.25075,
+            1.26382
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1352
+    },
+    {
+      "timestamp_utc": "2025-12-13T18:03:09.897047+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T17:37:09Z\",\n    \"avg_ns\": 84147362683,\n    \"stddev_ns\": 2418683,\n    \"avg_ts\": 1.521141,\n    \"stddev_ts\": 0.000043,\n    \"samples_ns\": [ 84147971067, 84149383172, 84144733812 ],\n    \"samples_ts\": [ 1.52113, 1.5211, 1.52119 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T17:42:46Z\",\n    \"avg_ns\": 407627343815,\n    \"stddev_ns\": 626509912,\n    \"avg_ts\": 1.256051,\n    \"stddev_ts\": 0.001929,\n    \"samples_ns\": [ 408350751114, 407260613660, 407270666673 ],\n    \"samples_ts\": [ 1.25382, 1.25718, 1.25715 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T17:37:09Z",
+          "avg_ns": 84147362683,
+          "stddev_ns": 2418683,
+          "avg_ts": 1.521141,
+          "stddev_ts": 4.3e-05,
+          "samples_ns": [
+            84147971067,
+            84149383172,
+            84144733812
+          ],
+          "samples_ts": [
+            1.52113,
+            1.5211,
+            1.52119
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T17:42:46Z",
+          "avg_ns": 407627343815,
+          "stddev_ns": 626509912,
+          "avg_ts": 1.256051,
+          "stddev_ts": 0.001929,
+          "samples_ns": [
+            408350751114,
+            407260613660,
+            407270666673
+          ],
+          "samples_ts": [
+            1.25382,
+            1.25718,
+            1.25715
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1353
+    },
+    {
+      "timestamp_utc": "2025-12-13T18:30:53.369852+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T18:03:11Z\",\n    \"avg_ns\": 339664920575,\n    \"stddev_ns\": 7934378,\n    \"avg_ts\": 1.507368,\n    \"stddev_ts\": 0.000035,\n    \"samples_ns\": [ 339671242904, 339667472188, 339656046634 ],\n    \"samples_ts\": [ 1.50734, 1.50736, 1.50741 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T18:25:49Z\",\n    \"avg_ns\": 100869005543,\n    \"stddev_ns\": 8443774,\n    \"avg_ts\": 1.268973,\n    \"stddev_ts\": 0.000106,\n    \"samples_ns\": [ 100878755516, 100864101363, 100864159750 ],\n    \"samples_ts\": [ 1.26885, 1.26903, 1.26903 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T18:03:11Z",
+          "avg_ns": 339664920575,
+          "stddev_ns": 7934378,
+          "avg_ts": 1.507368,
+          "stddev_ts": 3.5e-05,
+          "samples_ns": [
+            339671242904,
+            339667472188,
+            339656046634
+          ],
+          "samples_ts": [
+            1.50734,
+            1.50736,
+            1.50741
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T18:25:49Z",
+          "avg_ns": 100869005543,
+          "stddev_ns": 8443774,
+          "avg_ts": 1.268973,
+          "stddev_ts": 0.000106,
+          "samples_ns": [
+            100878755516,
+            100864101363,
+            100864159750
+          ],
+          "samples_ts": [
+            1.26885,
+            1.26903,
+            1.26903
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1354
+    },
+    {
+      "timestamp_utc": "2025-12-13T19:13:55.500270+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T18:30:54Z\",\n    \"avg_ns\": 339639305530,\n    \"stddev_ns\": 11330764,\n    \"avg_ts\": 1.507482,\n    \"stddev_ts\": 0.000050,\n    \"samples_ns\": [ 339631900235, 339633685207, 339652331149 ],\n    \"samples_ts\": [ 1.50751, 1.50751, 1.50742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T18:53:33Z\",\n    \"avg_ns\": 407116889685,\n    \"stddev_ns\": 39456514,\n    \"avg_ts\": 1.257624,\n    \"stddev_ts\": 0.000122,\n    \"samples_ns\": [ 407162450113, 407094155285, 407094063657 ],\n    \"samples_ts\": [ 1.25748, 1.25769, 1.25769 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T18:30:54Z",
+          "avg_ns": 339639305530,
+          "stddev_ns": 11330764,
+          "avg_ts": 1.507482,
+          "stddev_ts": 5e-05,
+          "samples_ns": [
+            339631900235,
+            339633685207,
+            339652331149
+          ],
+          "samples_ts": [
+            1.50751,
+            1.50751,
+            1.50742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T18:53:33Z",
+          "avg_ns": 407116889685,
+          "stddev_ns": 39456514,
+          "avg_ts": 1.257624,
+          "stddev_ts": 0.000122,
+          "samples_ns": [
+            407162450113,
+            407094155285,
+            407094063657
+          ],
+          "samples_ts": [
+            1.25748,
+            1.25769,
+            1.25769
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1355
+    },
+    {
+      "timestamp_utc": "2025-12-13T19:24:36.970443+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T19:13:56Z\",\n    \"avg_ns\": 84153620104,\n    \"stddev_ns\": 3967658,\n    \"avg_ts\": 1.521028,\n    \"stddev_ts\": 0.000072,\n    \"samples_ns\": [ 84150025842, 84152956894, 84157877576 ],\n    \"samples_ts\": [ 1.52109, 1.52104, 1.52095 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T19:19:33Z\",\n    \"avg_ns\": 100896800734,\n    \"stddev_ns\": 3819242,\n    \"avg_ts\": 1.268623,\n    \"stddev_ts\": 0.000048,\n    \"samples_ns\": [ 100901185755, 100894862023, 100894354425 ],\n    \"samples_ts\": [ 1.26857, 1.26865, 1.26865 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T19:13:56Z",
+          "avg_ns": 84153620104,
+          "stddev_ns": 3967658,
+          "avg_ts": 1.521028,
+          "stddev_ts": 7.2e-05,
+          "samples_ns": [
+            84150025842,
+            84152956894,
+            84157877576
+          ],
+          "samples_ts": [
+            1.52109,
+            1.52104,
+            1.52095
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T19:19:33Z",
+          "avg_ns": 100896800734,
+          "stddev_ns": 3819242,
+          "avg_ts": 1.268623,
+          "stddev_ts": 4.8e-05,
+          "samples_ns": [
+            100901185755,
+            100894862023,
+            100894354425
+          ],
+          "samples_ts": [
+            1.26857,
+            1.26865,
+            1.26865
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1356
+    },
+    {
+      "timestamp_utc": "2025-12-13T19:50:37.235423+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T19:24:38Z\",\n    \"avg_ns\": 84149865652,\n    \"stddev_ns\": 1975101,\n    \"avg_ts\": 1.521096,\n    \"stddev_ts\": 0.000035,\n    \"samples_ns\": [ 84148651823, 84148825800, 84152119334 ],\n    \"samples_ts\": [ 1.52112, 1.52111, 1.52105 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T19:30:14Z\",\n    \"avg_ns\": 407156990071,\n    \"stddev_ns\": 8019103,\n    \"avg_ts\": 1.257500,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 407166206822, 407153152831, 407151610560 ],\n    \"samples_ts\": [ 1.25747, 1.25751, 1.25752 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T19:24:38Z",
+          "avg_ns": 84149865652,
+          "stddev_ns": 1975101,
+          "avg_ts": 1.521096,
+          "stddev_ts": 3.5e-05,
+          "samples_ns": [
+            84148651823,
+            84148825800,
+            84152119334
+          ],
+          "samples_ts": [
+            1.52112,
+            1.52111,
+            1.52105
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T19:30:14Z",
+          "avg_ns": 407156990071,
+          "stddev_ns": 8019103,
+          "avg_ts": 1.2575,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            407166206822,
+            407153152831,
+            407151610560
+          ],
+          "samples_ts": [
+            1.25747,
+            1.25751,
+            1.25752
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1357
+    },
+    {
+      "timestamp_utc": "2025-12-13T20:18:12.157140+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T19:50:38Z\",\n    \"avg_ns\": 337592135984,\n    \"stddev_ns\": 4007269,\n    \"avg_ts\": 1.516623,\n    \"stddev_ts\": 0.000018,\n    \"samples_ns\": [ 337596690958, 337590258216, 337589458779 ],\n    \"samples_ts\": [ 1.5166, 1.51663, 1.51664 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T20:13:08Z\",\n    \"avg_ns\": 100793047153,\n    \"stddev_ns\": 3951802,\n    \"avg_ts\": 1.269929,\n    \"stddev_ts\": 0.000050,\n    \"samples_ns\": [ 100797579520, 100790450532, 100791111408 ],\n    \"samples_ts\": [ 1.26987, 1.26996, 1.26995 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T19:50:38Z",
+          "avg_ns": 337592135984,
+          "stddev_ns": 4007269,
+          "avg_ts": 1.516623,
+          "stddev_ts": 1.8e-05,
+          "samples_ns": [
+            337596690958,
+            337590258216,
+            337589458779
+          ],
+          "samples_ts": [
+            1.5166,
+            1.51663,
+            1.51664
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T20:13:08Z",
+          "avg_ns": 100793047153,
+          "stddev_ns": 3951802,
+          "avg_ts": 1.269929,
+          "stddev_ts": 5e-05,
+          "samples_ns": [
+            100797579520,
+            100790450532,
+            100791111408
+          ],
+          "samples_ts": [
+            1.26987,
+            1.26996,
+            1.26995
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1358
+    },
+    {
+      "timestamp_utc": "2025-12-13T21:01:06.394341+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T20:18:13Z\",\n    \"avg_ns\": 337624166061,\n    \"stddev_ns\": 2148426,\n    \"avg_ts\": 1.516479,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 337624088717, 337622137459, 337626272008 ],\n    \"samples_ts\": [ 1.51648, 1.51649, 1.51647 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T20:40:44Z\",\n    \"avg_ns\": 407158331124,\n    \"stddev_ns\": 3059459,\n    \"avg_ts\": 1.257496,\n    \"stddev_ts\": 0.000009,\n    \"samples_ns\": [ 407160023000, 407154799398, 407160170974 ],\n    \"samples_ts\": [ 1.25749, 1.25751, 1.25749 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T20:18:13Z",
+          "avg_ns": 337624166061,
+          "stddev_ns": 2148426,
+          "avg_ts": 1.516479,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            337624088717,
+            337622137459,
+            337626272008
+          ],
+          "samples_ts": [
+            1.51648,
+            1.51649,
+            1.51647
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T20:40:44Z",
+          "avg_ns": 407158331124,
+          "stddev_ns": 3059459,
+          "avg_ts": 1.257496,
+          "stddev_ts": 9e-06,
+          "samples_ns": [
+            407160023000,
+            407154799398,
+            407160170974
+          ],
+          "samples_ts": [
+            1.25749,
+            1.25751,
+            1.25749
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1359
+    },
+    {
+      "timestamp_utc": "2025-12-13T21:11:47.708554+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T21:01:07Z\",\n    \"avg_ns\": 84173036578,\n    \"stddev_ns\": 1261284,\n    \"avg_ts\": 1.520677,\n    \"stddev_ts\": 0.000022,\n    \"samples_ns\": [ 84171623148, 84173834542, 84173652045 ],\n    \"samples_ts\": [ 1.5207, 1.52066, 1.52067 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T21:06:44Z\",\n    \"avg_ns\": 100816072203,\n    \"stddev_ns\": 3598302,\n    \"avg_ts\": 1.269639,\n    \"stddev_ts\": 0.000045,\n    \"samples_ns\": [ 100820223984, 100814137096, 100813855529 ],\n    \"samples_ts\": [ 1.26959, 1.26966, 1.26967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T21:01:07Z",
+          "avg_ns": 84173036578,
+          "stddev_ns": 1261284,
+          "avg_ts": 1.520677,
+          "stddev_ts": 2.2e-05,
+          "samples_ns": [
+            84171623148,
+            84173834542,
+            84173652045
+          ],
+          "samples_ts": [
+            1.5207,
+            1.52066,
+            1.52067
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T21:06:44Z",
+          "avg_ns": 100816072203,
+          "stddev_ns": 3598302,
+          "avg_ts": 1.269639,
+          "stddev_ts": 4.5e-05,
+          "samples_ns": [
+            100820223984,
+            100814137096,
+            100813855529
+          ],
+          "samples_ts": [
+            1.26959,
+            1.26966,
+            1.26967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1360
+    },
+    {
+      "timestamp_utc": "2025-12-13T21:37:47.068231+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T21:11:48Z\",\n    \"avg_ns\": 84163517531,\n    \"stddev_ns\": 845016,\n    \"avg_ts\": 1.520849,\n    \"stddev_ts\": 0.000013,\n    \"samples_ns\": [ 84162692545, 84164117836, 84163742214 ],\n    \"samples_ts\": [ 1.52086, 1.52084, 1.52084 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T21:17:25Z\",\n    \"avg_ns\": 406838663369,\n    \"stddev_ns\": 21512168,\n    \"avg_ts\": 1.258484,\n    \"stddev_ts\": 0.000067,\n    \"samples_ns\": [ 406861883983, 406834693462, 406819412662 ],\n    \"samples_ts\": [ 1.25841, 1.2585, 1.25854 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T21:11:48Z",
+          "avg_ns": 84163517531,
+          "stddev_ns": 845016,
+          "avg_ts": 1.520849,
+          "stddev_ts": 1.3e-05,
+          "samples_ns": [
+            84162692545,
+            84164117836,
+            84163742214
+          ],
+          "samples_ts": [
+            1.52086,
+            1.52084,
+            1.52084
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T21:17:25Z",
+          "avg_ns": 406838663369,
+          "stddev_ns": 21512168,
+          "avg_ts": 1.258484,
+          "stddev_ts": 6.7e-05,
+          "samples_ns": [
+            406861883983,
+            406834693462,
+            406819412662
+          ],
+          "samples_ts": [
+            1.25841,
+            1.2585,
+            1.25854
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1361
+    },
+    {
+      "timestamp_utc": "2025-12-13T22:05:24.237204+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T21:37:48Z\",\n    \"avg_ns\": 338117584492,\n    \"stddev_ns\": 8943417,\n    \"avg_ts\": 1.514266,\n    \"stddev_ts\": 0.000040,\n    \"samples_ns\": [ 338110442289, 338114695994, 338127615193 ],\n    \"samples_ts\": [ 1.5143, 1.51428, 1.51422 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:00:20Z\",\n    \"avg_ns\": 100812724850,\n    \"stddev_ns\": 5935648,\n    \"avg_ts\": 1.269681,\n    \"stddev_ts\": 0.000075,\n    \"samples_ns\": [ 100819145825, 100811566051, 100807462675 ],\n    \"samples_ts\": [ 1.2696, 1.2697, 1.26975 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T21:37:48Z",
+          "avg_ns": 338117584492,
+          "stddev_ns": 8943417,
+          "avg_ts": 1.514266,
+          "stddev_ts": 4e-05,
+          "samples_ns": [
+            338110442289,
+            338114695994,
+            338127615193
+          ],
+          "samples_ts": [
+            1.5143,
+            1.51428,
+            1.51422
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:00:20Z",
+          "avg_ns": 100812724850,
+          "stddev_ns": 5935648,
+          "avg_ts": 1.269681,
+          "stddev_ts": 7.5e-05,
+          "samples_ns": [
+            100819145825,
+            100811566051,
+            100807462675
+          ],
+          "samples_ts": [
+            1.2696,
+            1.2697,
+            1.26975
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1362
+    },
+    {
+      "timestamp_utc": "2025-12-13T22:48:19.701535+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:05:25Z\",\n    \"avg_ns\": 338147017560,\n    \"stddev_ns\": 5939193,\n    \"avg_ts\": 1.514134,\n    \"stddev_ts\": 0.000026,\n    \"samples_ns\": [ 338145067740, 338153656791, 338142328150 ],\n    \"samples_ts\": [ 1.51414, 1.5141, 1.51416 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:27:58Z\",\n    \"avg_ns\": 406886327425,\n    \"stddev_ns\": 6298726,\n    \"avg_ts\": 1.258337,\n    \"stddev_ts\": 0.000019,\n    \"samples_ns\": [ 406893554172, 406883026578, 406882401526 ],\n    \"samples_ts\": [ 1.25831, 1.25835, 1.25835 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:05:25Z",
+          "avg_ns": 338147017560,
+          "stddev_ns": 5939193,
+          "avg_ts": 1.514134,
+          "stddev_ts": 2.6e-05,
+          "samples_ns": [
+            338145067740,
+            338153656791,
+            338142328150
+          ],
+          "samples_ts": [
+            1.51414,
+            1.5141,
+            1.51416
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:27:58Z",
+          "avg_ns": 406886327425,
+          "stddev_ns": 6298726,
+          "avg_ts": 1.258337,
+          "stddev_ts": 1.9e-05,
+          "samples_ns": [
+            406893554172,
+            406883026578,
+            406882401526
+          ],
+          "samples_ts": [
+            1.25831,
+            1.25835,
+            1.25835
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1363
+    },
+    {
+      "timestamp_utc": "2025-12-13T22:59:01.135485+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:48:20Z\",\n    \"avg_ns\": 84159117431,\n    \"stddev_ns\": 1175921,\n    \"avg_ts\": 1.520928,\n    \"stddev_ts\": 0.000021,\n    \"samples_ns\": [ 84159774828, 84157801563, 84159775903 ],\n    \"samples_ts\": [ 1.52092, 1.52095, 1.52092 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:53:57Z\",\n    \"avg_ns\": 100863536385,\n    \"stddev_ns\": 837684,\n    \"avg_ts\": 1.269041,\n    \"stddev_ts\": 0.000011,\n    \"samples_ns\": [ 100864263654, 100862620459, 100863725042 ],\n    \"samples_ts\": [ 1.26903, 1.26905, 1.26904 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:48:20Z",
+          "avg_ns": 84159117431,
+          "stddev_ns": 1175921,
+          "avg_ts": 1.520928,
+          "stddev_ts": 2.1e-05,
+          "samples_ns": [
+            84159774828,
+            84157801563,
+            84159775903
+          ],
+          "samples_ts": [
+            1.52092,
+            1.52095,
+            1.52092
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:53:57Z",
+          "avg_ns": 100863536385,
+          "stddev_ns": 837684,
+          "avg_ts": 1.269041,
+          "stddev_ts": 1.1e-05,
+          "samples_ns": [
+            100864263654,
+            100862620459,
+            100863725042
+          ],
+          "samples_ts": [
+            1.26903,
+            1.26905,
+            1.26904
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1364
+    },
+    {
+      "timestamp_utc": "2025-12-13T23:25:01.680222+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T22:59:02Z\",\n    \"avg_ns\": 84169638320,\n    \"stddev_ns\": 1806294,\n    \"avg_ts\": 1.520738,\n    \"stddev_ts\": 0.000032,\n    \"samples_ns\": [ 84171653962, 84168267977, 84168993022 ],\n    \"samples_ts\": [ 1.5207, 1.52076, 1.52075 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T23:04:39Z\",\n    \"avg_ns\": 407186742611,\n    \"stddev_ns\": 3784594,\n    \"avg_ts\": 1.257408,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 407190824894, 407183511011, 407185891929 ],\n    \"samples_ts\": [ 1.2574, 1.25742, 1.25741 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T22:59:02Z",
+          "avg_ns": 84169638320,
+          "stddev_ns": 1806294,
+          "avg_ts": 1.520738,
+          "stddev_ts": 3.2e-05,
+          "samples_ns": [
+            84171653962,
+            84168267977,
+            84168993022
+          ],
+          "samples_ts": [
+            1.5207,
+            1.52076,
+            1.52075
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-13T23:04:39Z",
+          "avg_ns": 407186742611,
+          "stddev_ns": 3784594,
+          "avg_ts": 1.257408,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            407190824894,
+            407183511011,
+            407185891929
+          ],
+          "samples_ts": [
+            1.2574,
+            1.25742,
+            1.25741
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1365
+    },
+    {
+      "timestamp_utc": "2025-12-13T23:52:44.773391+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T23:25:02Z\",\n    \"avg_ns\": 339628921678,\n    \"stddev_ns\": 5170692,\n    \"avg_ts\": 1.507528,\n    \"stddev_ts\": 0.000023,\n    \"samples_ns\": [ 339633614488, 339629663956, 339623486592 ],\n    \"samples_ts\": [ 1.50751, 1.50752, 1.50755 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T23:47:41Z\",\n    \"avg_ns\": 100798851932,\n    \"stddev_ns\": 4001745,\n    \"avg_ts\": 1.269856,\n    \"stddev_ts\": 0.000050,\n    \"samples_ns\": [ 100803202609, 100797986891, 100795366297 ],\n    \"samples_ts\": [ 1.2698, 1.26987, 1.2699 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T23:25:02Z",
+          "avg_ns": 339628921678,
+          "stddev_ns": 5170692,
+          "avg_ts": 1.507528,
+          "stddev_ts": 2.3e-05,
+          "samples_ns": [
+            339633614488,
+            339629663956,
+            339623486592
+          ],
+          "samples_ts": [
+            1.50751,
+            1.50752,
+            1.50755
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-13T23:47:41Z",
+          "avg_ns": 100798851932,
+          "stddev_ns": 4001745,
+          "avg_ts": 1.269856,
+          "stddev_ts": 5e-05,
+          "samples_ns": [
+            100803202609,
+            100797986891,
+            100795366297
+          ],
+          "samples_ts": [
+            1.2698,
+            1.26987,
+            1.2699
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1366
+    },
+    {
+      "timestamp_utc": "2025-12-14T00:35:47.012879+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "2",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-13T23:52:46Z\",\n    \"avg_ns\": 339688941598,\n    \"stddev_ns\": 6749890,\n    \"avg_ts\": 1.507261,\n    \"stddev_ts\": 0.000030,\n    \"samples_ns\": [ 339696651484, 339685634280, 339684539032 ],\n    \"samples_ts\": [ 1.50723, 1.50728, 1.50728 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 2,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T00:15:24Z\",\n    \"avg_ns\": 407089803655,\n    \"stddev_ns\": 7097566,\n    \"avg_ts\": 1.257708,\n    \"stddev_ts\": 0.000022,\n    \"samples_ns\": [ 407097772754, 407087208846, 407084429367 ],\n    \"samples_ts\": [ 1.25768, 1.25772, 1.25772 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-13T23:52:46Z",
+          "avg_ns": 339688941598,
+          "stddev_ns": 6749890,
+          "avg_ts": 1.507261,
+          "stddev_ts": 3e-05,
+          "samples_ns": [
+            339696651484,
+            339685634280,
+            339684539032
+          ],
+          "samples_ts": [
+            1.50723,
+            1.50728,
+            1.50728
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 2,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T00:15:24Z",
+          "avg_ns": 407089803655,
+          "stddev_ns": 7097566,
+          "avg_ts": 1.257708,
+          "stddev_ts": 2.2e-05,
+          "samples_ns": [
+            407097772754,
+            407087208846,
+            407084429367
+          ],
+          "samples_ts": [
+            1.25768,
+            1.25772,
+            1.25772
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 2,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1367
+    },
+    {
+      "timestamp_utc": "2025-12-14T00:43:15.566363+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T00:35:48Z\",\n    \"avg_ns\": 56784706632,\n    \"stddev_ns\": 3591710,\n    \"avg_ts\": 2.254128,\n    \"stddev_ts\": 0.000142,\n    \"samples_ns\": [ 56788037706, 56780914699, 56785167492 ],\n    \"samples_ts\": [ 2.254, 2.25428, 2.25411 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T00:39:35Z\",\n    \"avg_ns\": 73165478202,\n    \"stddev_ns\": 990177448,\n    \"avg_ts\": 1.749671,\n    \"stddev_ts\": 0.023495,\n    \"samples_ns\": [ 72597957294, 72589650778, 74308826535 ],\n    \"samples_ts\": [ 1.76314, 1.76334, 1.72254 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T00:35:48Z",
+          "avg_ns": 56784706632,
+          "stddev_ns": 3591710,
+          "avg_ts": 2.254128,
+          "stddev_ts": 0.000142,
+          "samples_ns": [
+            56788037706,
+            56780914699,
+            56785167492
+          ],
+          "samples_ts": [
+            2.254,
+            2.25428,
+            2.25411
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T00:39:35Z",
+          "avg_ns": 73165478202,
+          "stddev_ns": 990177448,
+          "avg_ts": 1.749671,
+          "stddev_ts": 0.023495,
+          "samples_ns": [
+            72597957294,
+            72589650778,
+            74308826535
+          ],
+          "samples_ts": [
+            1.76314,
+            1.76334,
+            1.72254
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1368
+    },
+    {
+      "timestamp_utc": "2025-12-14T01:02:12.223434+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T00:43:16Z\",\n    \"avg_ns\": 56796051530,\n    \"stddev_ns\": 2495939,\n    \"avg_ts\": 2.253678,\n    \"stddev_ts\": 0.000098,\n    \"samples_ns\": [ 56795697795, 56798682431, 56793774366 ],\n    \"samples_ts\": [ 2.25369, 2.25357, 2.25377 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T00:47:04Z\",\n    \"avg_ns\": 302493684992,\n    \"stddev_ns\": 1478817767,\n    \"avg_ts\": 1.692739,\n    \"stddev_ts\": 0.019025,\n    \"samples_ns\": [ 298593213109, 304447531966, 304440309902 ],\n    \"samples_ts\": [ 1.71471, 1.68173, 1.68177 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T00:43:16Z",
+          "avg_ns": 56796051530,
+          "stddev_ns": 2495939,
+          "avg_ts": 2.253678,
+          "stddev_ts": 9.8e-05,
+          "samples_ns": [
+            56795697795,
+            56798682431,
+            56793774366
+          ],
+          "samples_ts": [
+            2.25369,
+            2.25357,
+            2.25377
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T00:47:04Z",
+          "avg_ns": 302493684992,
+          "stddev_ns": 1478817767,
+          "avg_ts": 1.692739,
+          "stddev_ts": 0.019025,
+          "samples_ns": [
+            298593213109,
+            304447531966,
+            304440309902
+          ],
+          "samples_ts": [
+            1.71471,
+            1.68173,
+            1.68177
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1369
+    },
+    {
+      "timestamp_utc": "2025-12-14T01:21:03.626928+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:02:13Z\",\n    \"avg_ns\": 227803263278,\n    \"stddev_ns\": 4832910,\n    \"avg_ts\": 2.247553,\n    \"stddev_ts\": 0.000048,\n    \"samples_ns\": [ 227808483146, 227798944029, 227802362659 ],\n    \"samples_ts\": [ 2.2475, 2.2476, 2.24756 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:17:24Z\",\n    \"avg_ns\": 72740255244,\n    \"stddev_ns\": 69555655,\n    \"avg_ts\": 1.759687,\n    \"stddev_ts\": 0.001682,\n    \"samples_ns\": [ 72703306995, 72696971482, 72820487256 ],\n    \"samples_ts\": [ 1.76058, 1.76073, 1.75775 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:02:13Z",
+          "avg_ns": 227803263278,
+          "stddev_ns": 4832910,
+          "avg_ts": 2.247553,
+          "stddev_ts": 4.8e-05,
+          "samples_ns": [
+            227808483146,
+            227798944029,
+            227802362659
+          ],
+          "samples_ts": [
+            2.2475,
+            2.2476,
+            2.24756
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:17:24Z",
+          "avg_ns": 72740255244,
+          "stddev_ns": 69555655,
+          "avg_ts": 1.759687,
+          "stddev_ts": 0.001682,
+          "samples_ns": [
+            72703306995,
+            72696971482,
+            72820487256
+          ],
+          "samples_ts": [
+            1.76058,
+            1.76073,
+            1.75775
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1370
+    },
+    {
+      "timestamp_utc": "2025-12-14T01:51:22.698712+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:21:04Z\",\n    \"avg_ns\": 227824385282,\n    \"stddev_ns\": 12225024,\n    \"avg_ts\": 2.247345,\n    \"stddev_ts\": 0.000120,\n    \"samples_ns\": [ 227838471734, 227817976327, 227816707786 ],\n    \"samples_ts\": [ 2.24721, 2.24741, 2.24742 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:36:16Z\",\n    \"avg_ns\": 301921736551,\n    \"stddev_ns\": 3798305279,\n    \"avg_ts\": 1.695984,\n    \"stddev_ts\": 0.021492,\n    \"samples_ns\": [ 297535878715, 304097022466, 304132308472 ],\n    \"samples_ts\": [ 1.7208, 1.68367, 1.68348 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:21:04Z",
+          "avg_ns": 227824385282,
+          "stddev_ns": 12225024,
+          "avg_ts": 2.247345,
+          "stddev_ts": 0.00012,
+          "samples_ns": [
+            227838471734,
+            227817976327,
+            227816707786
+          ],
+          "samples_ts": [
+            2.24721,
+            2.24741,
+            2.24742
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:36:16Z",
+          "avg_ns": 301921736551,
+          "stddev_ns": 3798305279,
+          "avg_ts": 1.695984,
+          "stddev_ts": 0.021492,
+          "samples_ns": [
+            297535878715,
+            304097022466,
+            304132308472
+          ],
+          "samples_ts": [
+            1.7208,
+            1.68367,
+            1.68348
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1371
+    },
+    {
+      "timestamp_utc": "2025-12-14T01:58:52.484296+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:51:23Z\",\n    \"avg_ns\": 56785905745,\n    \"stddev_ns\": 2433111,\n    \"avg_ts\": 2.254080,\n    \"stddev_ts\": 0.000096,\n    \"samples_ns\": [ 56783346796, 56788160966, 56786209474 ],\n    \"samples_ts\": [ 2.25418, 2.25399, 2.25407 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:55:11Z\",\n    \"avg_ns\": 73563558704,\n    \"stddev_ns\": 1366705816,\n    \"avg_ts\": 1.740388,\n    \"stddev_ts\": 0.032014,\n    \"samples_ns\": [ 72615143112, 72945399557, 75130133445 ],\n    \"samples_ts\": [ 1.76272, 1.75474, 1.70371 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:51:23Z",
+          "avg_ns": 56785905745,
+          "stddev_ns": 2433111,
+          "avg_ts": 2.25408,
+          "stddev_ts": 9.6e-05,
+          "samples_ns": [
+            56783346796,
+            56788160966,
+            56786209474
+          ],
+          "samples_ts": [
+            2.25418,
+            2.25399,
+            2.25407
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:55:11Z",
+          "avg_ns": 73563558704,
+          "stddev_ns": 1366705816,
+          "avg_ts": 1.740388,
+          "stddev_ts": 0.032014,
+          "samples_ns": [
+            72615143112,
+            72945399557,
+            75130133445
+          ],
+          "samples_ts": [
+            1.76272,
+            1.75474,
+            1.70371
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1372
+    },
+    {
+      "timestamp_utc": "2025-12-14T02:17:49.469957+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T01:58:53Z\",\n    \"avg_ns\": 56782353016,\n    \"stddev_ns\": 2349100,\n    \"avg_ts\": 2.254221,\n    \"stddev_ts\": 0.000092,\n    \"samples_ns\": [ 56784529566, 56782625508, 56779903976 ],\n    \"samples_ts\": [ 2.25414, 2.25421, 2.25432 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T02:02:40Z\",\n    \"avg_ns\": 302614142096,\n    \"stddev_ns\": 4236012511,\n    \"avg_ts\": 1.692032,\n    \"stddev_ts\": 0.016605,\n    \"samples_ns\": [ 299204271436, 304323104494, 304315050359 ],\n    \"samples_ts\": [ 1.71121, 1.68242, 1.68247 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T01:58:53Z",
+          "avg_ns": 56782353016,
+          "stddev_ns": 2349100,
+          "avg_ts": 2.254221,
+          "stddev_ts": 9.2e-05,
+          "samples_ns": [
+            56784529566,
+            56782625508,
+            56779903976
+          ],
+          "samples_ts": [
+            2.25414,
+            2.25421,
+            2.25432
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T02:02:40Z",
+          "avg_ns": 302614142096,
+          "stddev_ns": 4236012511,
+          "avg_ts": 1.692032,
+          "stddev_ts": 0.016605,
+          "samples_ns": [
+            299204271436,
+            304323104494,
+            304315050359
+          ],
+          "samples_ts": [
+            1.71121,
+            1.68242,
+            1.68247
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1373
+    },
+    {
+      "timestamp_utc": "2025-12-14T02:36:44.077441+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T02:17:50Z\",\n    \"avg_ns\": 228432210971,\n    \"stddev_ns\": 3906766,\n    \"avg_ts\": 2.241365,\n    \"stddev_ts\": 0.000038,\n    \"samples_ns\": [ 228429586638, 228430381634, 228436664642 ],\n    \"samples_ts\": [ 2.24139, 2.24138, 2.24132 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T02:33:04Z\",\n    \"avg_ns\": 72966867247,\n    \"stddev_ns\": 680893371,\n    \"avg_ts\": 1.754322,\n    \"stddev_ts\": 0.016283,\n    \"samples_ns\": [ 72576871011, 72570643823, 73753086908 ],\n    \"samples_ts\": [ 1.76365, 1.7638, 1.73552 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T02:17:50Z",
+          "avg_ns": 228432210971,
+          "stddev_ns": 3906766,
+          "avg_ts": 2.241365,
+          "stddev_ts": 3.8e-05,
+          "samples_ns": [
+            228429586638,
+            228430381634,
+            228436664642
+          ],
+          "samples_ts": [
+            2.24139,
+            2.24138,
+            2.24132
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T02:33:04Z",
+          "avg_ns": 72966867247,
+          "stddev_ns": 680893371,
+          "avg_ts": 1.754322,
+          "stddev_ts": 0.016283,
+          "samples_ns": [
+            72576871011,
+            72570643823,
+            73753086908
+          ],
+          "samples_ts": [
+            1.76365,
+            1.7638,
+            1.73552
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1374
+    },
+    {
+      "timestamp_utc": "2025-12-14T03:07:05.447367+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T02:36:45Z\",\n    \"avg_ns\": 228394757986,\n    \"stddev_ns\": 6165049,\n    \"avg_ts\": 2.241733,\n    \"stddev_ts\": 0.000060,\n    \"samples_ns\": [ 228399009118, 228387710495, 228397554346 ],\n    \"samples_ts\": [ 2.24169, 2.2418, 2.24171 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T02:51:58Z\",\n    \"avg_ns\": 301929426390,\n    \"stddev_ns\": 3929310093,\n    \"avg_ts\": 1.695953,\n    \"stddev_ts\": 0.022238,\n    \"samples_ns\": [ 297392377150, 304168523910, 304227378110 ],\n    \"samples_ts\": [ 1.72163, 1.68328, 1.68295 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T02:36:45Z",
+          "avg_ns": 228394757986,
+          "stddev_ns": 6165049,
+          "avg_ts": 2.241733,
+          "stddev_ts": 6e-05,
+          "samples_ns": [
+            228399009118,
+            228387710495,
+            228397554346
+          ],
+          "samples_ts": [
+            2.24169,
+            2.2418,
+            2.24171
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T02:51:58Z",
+          "avg_ns": 301929426390,
+          "stddev_ns": 3929310093,
+          "avg_ts": 1.695953,
+          "stddev_ts": 0.022238,
+          "samples_ns": [
+            297392377150,
+            304168523910,
+            304227378110
+          ],
+          "samples_ts": [
+            1.72163,
+            1.68328,
+            1.68295
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1375
+    },
+    {
+      "timestamp_utc": "2025-12-14T03:14:34.892359+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:07:06Z\",\n    \"avg_ns\": 56791944996,\n    \"stddev_ns\": 873286,\n    \"avg_ts\": 2.253841,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 56791029635, 56792680926, 56792124428 ],\n    \"samples_ts\": [ 2.25388, 2.25381, 2.25383 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:10:53Z\",\n    \"avg_ns\": 73431916442,\n    \"stddev_ns\": 3348872440,\n    \"avg_ts\": 1.743536,\n    \"stddev_ts\": 0.033142,\n    \"samples_ns\": [ 72574626942, 72660408922, 75060713464 ],\n    \"samples_ts\": [ 1.7637, 1.76162, 1.70529 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:07:06Z",
+          "avg_ns": 56791944996,
+          "stddev_ns": 873286,
+          "avg_ts": 2.253841,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            56791029635,
+            56792680926,
+            56792124428
+          ],
+          "samples_ts": [
+            2.25388,
+            2.25381,
+            2.25383
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:10:53Z",
+          "avg_ns": 73431916442,
+          "stddev_ns": 3348872440,
+          "avg_ts": 1.743536,
+          "stddev_ts": 0.033142,
+          "samples_ns": [
+            72574626942,
+            72660408922,
+            75060713464
+          ],
+          "samples_ts": [
+            1.7637,
+            1.76162,
+            1.70529
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1376
+    },
+    {
+      "timestamp_utc": "2025-12-14T03:33:31.696744+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:14:36Z\",\n    \"avg_ns\": 56780383837,\n    \"stddev_ns\": 1783251,\n    \"avg_ts\": 2.254300,\n    \"stddev_ts\": 0.000070,\n    \"samples_ns\": [ 56778614908, 56782149422, 56780387182 ],\n    \"samples_ts\": [ 2.25437, 2.25423, 2.2543 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:18:23Z\",\n    \"avg_ns\": 302566236278,\n    \"stddev_ns\": 1189039714,\n    \"avg_ts\": 1.692323,\n    \"stddev_ts\": 0.018356,\n    \"samples_ns\": [ 298800218677, 304444690171, 304453799987 ],\n    \"samples_ts\": [ 1.71352, 1.68175, 1.6817 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:14:36Z",
+          "avg_ns": 56780383837,
+          "stddev_ns": 1783251,
+          "avg_ts": 2.2543,
+          "stddev_ts": 7e-05,
+          "samples_ns": [
+            56778614908,
+            56782149422,
+            56780387182
+          ],
+          "samples_ts": [
+            2.25437,
+            2.25423,
+            2.2543
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:18:23Z",
+          "avg_ns": 302566236278,
+          "stddev_ns": 1189039714,
+          "avg_ts": 1.692323,
+          "stddev_ts": 0.018356,
+          "samples_ns": [
+            298800218677,
+            304444690171,
+            304453799987
+          ],
+          "samples_ts": [
+            1.71352,
+            1.68175,
+            1.6817
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1377
+    },
+    {
+      "timestamp_utc": "2025-12-14T03:52:30.973330+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:33:32Z\",\n    \"avg_ns\": 229530496896,\n    \"stddev_ns\": 5497061,\n    \"avg_ts\": 2.230640,\n    \"stddev_ts\": 0.000053,\n    \"samples_ns\": [ 229525184273, 229536161481, 229530144934 ],\n    \"samples_ts\": [ 2.23069, 2.23059, 2.23064 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:48:51Z\",\n    \"avg_ns\": 73055604556,\n    \"stddev_ns\": 3113920790,\n    \"avg_ts\": 1.752193,\n    \"stddev_ts\": 0.016408,\n    \"samples_ns\": [ 72658112378, 72658841994, 73849859297 ],\n    \"samples_ts\": [ 1.76168, 1.76166, 1.73325 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:33:32Z",
+          "avg_ns": 229530496896,
+          "stddev_ns": 5497061,
+          "avg_ts": 2.23064,
+          "stddev_ts": 5.3e-05,
+          "samples_ns": [
+            229525184273,
+            229536161481,
+            229530144934
+          ],
+          "samples_ts": [
+            2.23069,
+            2.23059,
+            2.23064
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:48:51Z",
+          "avg_ns": 73055604556,
+          "stddev_ns": 3113920790,
+          "avg_ts": 1.752193,
+          "stddev_ts": 0.016408,
+          "samples_ns": [
+            72658112378,
+            72658841994,
+            73849859297
+          ],
+          "samples_ts": [
+            1.76168,
+            1.76166,
+            1.73325
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1378
+    },
+    {
+      "timestamp_utc": "2025-12-14T04:22:56.703961+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T03:52:32Z\",\n    \"avg_ns\": 229556676284,\n    \"stddev_ns\": 4342962,\n    \"avg_ts\": 2.230386,\n    \"stddev_ts\": 0.000042,\n    \"samples_ns\": [ 229561197457, 229552663139, 229556168258 ],\n    \"samples_ts\": [ 2.23034, 2.23043, 2.23039 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:07:50Z\",\n    \"avg_ns\": 301834118645,\n    \"stddev_ns\": 2242004709,\n    \"avg_ts\": 1.696474,\n    \"stddev_ts\": 0.021371,\n    \"samples_ns\": [ 297475226314, 304013269099, 304013860523 ],\n    \"samples_ts\": [ 1.72115, 1.68414, 1.68413 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T03:52:32Z",
+          "avg_ns": 229556676284,
+          "stddev_ns": 4342962,
+          "avg_ts": 2.230386,
+          "stddev_ts": 4.2e-05,
+          "samples_ns": [
+            229561197457,
+            229552663139,
+            229556168258
+          ],
+          "samples_ts": [
+            2.23034,
+            2.23043,
+            2.23039
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:07:50Z",
+          "avg_ns": 301834118645,
+          "stddev_ns": 2242004709,
+          "avg_ts": 1.696474,
+          "stddev_ts": 0.021371,
+          "samples_ns": [
+            297475226314,
+            304013269099,
+            304013860523
+          ],
+          "samples_ts": [
+            1.72115,
+            1.68414,
+            1.68413
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1379
+    },
+    {
+      "timestamp_utc": "2025-12-14T04:30:25.774389+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:22:57Z\",\n    \"avg_ns\": 56788971169,\n    \"stddev_ns\": 652727,\n    \"avg_ts\": 2.253959,\n    \"stddev_ts\": 0.000026,\n    \"samples_ns\": [ 56788285575, 56789585120, 56789042812 ],\n    \"samples_ts\": [ 2.25399, 2.25393, 2.25396 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:26:45Z\",\n    \"avg_ns\": 73315896706,\n    \"stddev_ns\": 1163378857,\n    \"avg_ts\": 1.746160,\n    \"stddev_ts\": 0.027457,\n    \"samples_ns\": [ 72642178003, 72646263286, 74659248829 ],\n    \"samples_ts\": [ 1.76206, 1.76196, 1.71446 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:22:57Z",
+          "avg_ns": 56788971169,
+          "stddev_ns": 652727,
+          "avg_ts": 2.253959,
+          "stddev_ts": 2.6e-05,
+          "samples_ns": [
+            56788285575,
+            56789585120,
+            56789042812
+          ],
+          "samples_ts": [
+            2.25399,
+            2.25393,
+            2.25396
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:26:45Z",
+          "avg_ns": 73315896706,
+          "stddev_ns": 1163378857,
+          "avg_ts": 1.74616,
+          "stddev_ts": 0.027457,
+          "samples_ns": [
+            72642178003,
+            72646263286,
+            74659248829
+          ],
+          "samples_ts": [
+            1.76206,
+            1.76196,
+            1.71446
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1380
+    },
+    {
+      "timestamp_utc": "2025-12-14T04:49:22.644262+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:30:27Z\",\n    \"avg_ns\": 56784501868,\n    \"stddev_ns\": 2052164,\n    \"avg_ts\": 2.254136,\n    \"stddev_ts\": 0.000081,\n    \"samples_ns\": [ 56785828953, 56782154958, 56785521694 ],\n    \"samples_ts\": [ 2.25408, 2.25423, 2.2541 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:34:14Z\",\n    \"avg_ns\": 302577219228,\n    \"stddev_ns\": 4202925026,\n    \"avg_ts\": 1.692235,\n    \"stddev_ts\": 0.016340,\n    \"samples_ns\": [ 299222449455, 304273220505, 304235987724 ],\n    \"samples_ts\": [ 1.7111, 1.6827, 1.6829 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:30:27Z",
+          "avg_ns": 56784501868,
+          "stddev_ns": 2052164,
+          "avg_ts": 2.254136,
+          "stddev_ts": 8.1e-05,
+          "samples_ns": [
+            56785828953,
+            56782154958,
+            56785521694
+          ],
+          "samples_ts": [
+            2.25408,
+            2.25423,
+            2.2541
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:34:14Z",
+          "avg_ns": 302577219228,
+          "stddev_ns": 4202925026,
+          "avg_ts": 1.692235,
+          "stddev_ts": 0.01634,
+          "samples_ns": [
+            299222449455,
+            304273220505,
+            304235987724
+          ],
+          "samples_ts": [
+            1.7111,
+            1.6827,
+            1.6829
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1381
+    },
+    {
+      "timestamp_utc": "2025-12-14T05:08:16.594364+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T04:49:23Z\",\n    \"avg_ns\": 227848339311,\n    \"stddev_ns\": 4236608,\n    \"avg_ts\": 2.247109,\n    \"stddev_ts\": 0.000042,\n    \"samples_ns\": [ 227850442649, 227843492516, 227851082769 ],\n    \"samples_ts\": [ 2.24709, 2.24716, 2.24708 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:04:35Z\",\n    \"avg_ns\": 73516208721,\n    \"stddev_ns\": 3310096619,\n    \"avg_ts\": 1.741481,\n    \"stddev_ts\": 0.030880,\n    \"samples_ns\": [ 72640753706, 72877576005, 75030296454 ],\n    \"samples_ts\": [ 1.7621, 1.75637, 1.70598 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T04:49:23Z",
+          "avg_ns": 227848339311,
+          "stddev_ns": 4236608,
+          "avg_ts": 2.247109,
+          "stddev_ts": 4.2e-05,
+          "samples_ns": [
+            227850442649,
+            227843492516,
+            227851082769
+          ],
+          "samples_ts": [
+            2.24709,
+            2.24716,
+            2.24708
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:04:35Z",
+          "avg_ns": 73516208721,
+          "stddev_ns": 3310096619,
+          "avg_ts": 1.741481,
+          "stddev_ts": 0.03088,
+          "samples_ns": [
+            72640753706,
+            72877576005,
+            75030296454
+          ],
+          "samples_ts": [
+            1.7621,
+            1.75637,
+            1.70598
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1382
+    },
+    {
+      "timestamp_utc": "2025-12-14T05:38:38.660462+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:08:17Z\",\n    \"avg_ns\": 227895657503,\n    \"stddev_ns\": 12368354,\n    \"avg_ts\": 2.246642,\n    \"stddev_ts\": 0.000122,\n    \"samples_ns\": [ 227908885349, 227893657526, 227884429636 ],\n    \"samples_ts\": [ 2.24651, 2.24666, 2.24675 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:23:29Z\",\n    \"avg_ns\": 302826750950,\n    \"stddev_ns\": 4113727510,\n    \"avg_ts\": 1.690831,\n    \"stddev_ts\": 0.015575,\n    \"samples_ns\": [ 299622716789, 304426356545, 304431179516 ],\n    \"samples_ts\": [ 1.70882, 1.68185, 1.68183 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:08:17Z",
+          "avg_ns": 227895657503,
+          "stddev_ns": 12368354,
+          "avg_ts": 2.246642,
+          "stddev_ts": 0.000122,
+          "samples_ns": [
+            227908885349,
+            227893657526,
+            227884429636
+          ],
+          "samples_ts": [
+            2.24651,
+            2.24666,
+            2.24675
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:23:29Z",
+          "avg_ns": 302826750950,
+          "stddev_ns": 4113727510,
+          "avg_ts": 1.690831,
+          "stddev_ts": 0.015575,
+          "samples_ns": [
+            299622716789,
+            304426356545,
+            304431179516
+          ],
+          "samples_ts": [
+            1.70882,
+            1.68185,
+            1.68183
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1383
+    },
+    {
+      "timestamp_utc": "2025-12-14T05:46:09.823133+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:38:39Z\",\n    \"avg_ns\": 56790470081,\n    \"stddev_ns\": 1128451,\n    \"avg_ts\": 2.253899,\n    \"stddev_ts\": 0.000045,\n    \"samples_ns\": [ 56791770333, 56789893512, 56789746398 ],\n    \"samples_ts\": [ 2.25385, 2.25392, 2.25393 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:42:27Z\",\n    \"avg_ns\": 73999862890,\n    \"stddev_ns\": 1255618952,\n    \"avg_ts\": 1.730066,\n    \"stddev_ts\": 0.029451,\n    \"samples_ns\": [ 72673512806, 74155913149, 75170162715 ],\n    \"samples_ts\": [ 1.7613, 1.72609, 1.7028 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:38:39Z",
+          "avg_ns": 56790470081,
+          "stddev_ns": 1128451,
+          "avg_ts": 2.253899,
+          "stddev_ts": 4.5e-05,
+          "samples_ns": [
+            56791770333,
+            56789893512,
+            56789746398
+          ],
+          "samples_ts": [
+            2.25385,
+            2.25392,
+            2.25393
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:42:27Z",
+          "avg_ns": 73999862890,
+          "stddev_ns": 1255618952,
+          "avg_ts": 1.730066,
+          "stddev_ts": 0.029451,
+          "samples_ns": [
+            72673512806,
+            74155913149,
+            75170162715
+          ],
+          "samples_ts": [
+            1.7613,
+            1.72609,
+            1.7028
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1384
+    },
+    {
+      "timestamp_utc": "2025-12-14T06:05:07.970346+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:46:11Z\",\n    \"avg_ns\": 56800766371,\n    \"stddev_ns\": 5303767,\n    \"avg_ts\": 2.253491,\n    \"stddev_ts\": 0.000210,\n    \"samples_ns\": [ 56795424126, 56800855158, 56806019830 ],\n    \"samples_ts\": [ 2.2537, 2.25349, 2.25328 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T05:49:58Z\",\n    \"avg_ns\": 302968818908,\n    \"stddev_ns\": 3954598232,\n    \"avg_ts\": 1.690022,\n    \"stddev_ts\": 0.014197,\n    \"samples_ns\": [ 300044118700, 304422739530, 304439598496 ],\n    \"samples_ts\": [ 1.70642, 1.68187, 1.68178 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:46:11Z",
+          "avg_ns": 56800766371,
+          "stddev_ns": 5303767,
+          "avg_ts": 2.253491,
+          "stddev_ts": 0.00021,
+          "samples_ns": [
+            56795424126,
+            56800855158,
+            56806019830
+          ],
+          "samples_ts": [
+            2.2537,
+            2.25349,
+            2.25328
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T05:49:58Z",
+          "avg_ns": 302968818908,
+          "stddev_ns": 3954598232,
+          "avg_ts": 1.690022,
+          "stddev_ts": 0.014197,
+          "samples_ns": [
+            300044118700,
+            304422739530,
+            304439598496
+          ],
+          "samples_ts": [
+            1.70642,
+            1.68187,
+            1.68178
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1385
+    },
+    {
+      "timestamp_utc": "2025-12-14T06:24:04.573116+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:05:09Z\",\n    \"avg_ns\": 228399829996,\n    \"stddev_ns\": 5686021,\n    \"avg_ts\": 2.241683,\n    \"stddev_ts\": 0.000056,\n    \"samples_ns\": [ 228401909768, 228393396897, 228404183323 ],\n    \"samples_ts\": [ 2.24166, 2.24175, 2.24164 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:20:22Z\",\n    \"avg_ns\": 73665981332,\n    \"stddev_ns\": 1292697719,\n    \"avg_ts\": 1.737927,\n    \"stddev_ts\": 0.030291,\n    \"samples_ns\": [ 72598283111, 73296457527, 75103203360 ],\n    \"samples_ts\": [ 1.76313, 1.74633, 1.70432 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:05:09Z",
+          "avg_ns": 228399829996,
+          "stddev_ns": 5686021,
+          "avg_ts": 2.241683,
+          "stddev_ts": 5.6e-05,
+          "samples_ns": [
+            228401909768,
+            228393396897,
+            228404183323
+          ],
+          "samples_ts": [
+            2.24166,
+            2.24175,
+            2.24164
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:20:22Z",
+          "avg_ns": 73665981332,
+          "stddev_ns": 1292697719,
+          "avg_ts": 1.737927,
+          "stddev_ts": 0.030291,
+          "samples_ns": [
+            72598283111,
+            73296457527,
+            75103203360
+          ],
+          "samples_ts": [
+            1.76313,
+            1.74633,
+            1.70432
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1386
+    },
+    {
+      "timestamp_utc": "2025-12-14T06:54:28.486124+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:24:05Z\",\n    \"avg_ns\": 228363612011,\n    \"stddev_ns\": 8038215,\n    \"avg_ts\": 2.242038,\n    \"stddev_ts\": 0.000079,\n    \"samples_ns\": [ 228354468868, 228366975953, 228369391214 ],\n    \"samples_ts\": [ 2.24213, 2.24201, 2.24198 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:39:19Z\",\n    \"avg_ns\": 302810214071,\n    \"stddev_ns\": 2553125817,\n    \"avg_ts\": 1.690909,\n    \"stddev_ts\": 0.014327,\n    \"samples_ns\": [ 299862121835, 304288166904, 304280353475 ],\n    \"samples_ts\": [ 1.70745, 1.68262, 1.68266 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:24:05Z",
+          "avg_ns": 228363612011,
+          "stddev_ns": 8038215,
+          "avg_ts": 2.242038,
+          "stddev_ts": 7.9e-05,
+          "samples_ns": [
+            228354468868,
+            228366975953,
+            228369391214
+          ],
+          "samples_ts": [
+            2.24213,
+            2.24201,
+            2.24198
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:39:19Z",
+          "avg_ns": 302810214071,
+          "stddev_ns": 2553125817,
+          "avg_ts": 1.690909,
+          "stddev_ts": 0.014327,
+          "samples_ns": [
+            299862121835,
+            304288166904,
+            304280353475
+          ],
+          "samples_ts": [
+            1.70745,
+            1.68262,
+            1.68266
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1387
+    },
+    {
+      "timestamp_utc": "2025-12-14T07:01:57.373982+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:54:29Z\",\n    \"avg_ns\": 56791748394,\n    \"stddev_ns\": 1803454,\n    \"avg_ts\": 2.253849,\n    \"stddev_ts\": 0.000072,\n    \"samples_ns\": [ 56789888050, 56793488991, 56791868141 ],\n    \"samples_ts\": [ 2.25392, 2.25378, 2.25384 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T06:58:16Z\",\n    \"avg_ns\": 73256201827,\n    \"stddev_ns\": 1090713284,\n    \"avg_ts\": 1.747548,\n    \"stddev_ts\": 0.025798,\n    \"samples_ns\": [ 72610365029, 72642730034, 74515510418 ],\n    \"samples_ts\": [ 1.76283, 1.76205, 1.71776 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:54:29Z",
+          "avg_ns": 56791748394,
+          "stddev_ns": 1803454,
+          "avg_ts": 2.253849,
+          "stddev_ts": 7.2e-05,
+          "samples_ns": [
+            56789888050,
+            56793488991,
+            56791868141
+          ],
+          "samples_ts": [
+            2.25392,
+            2.25378,
+            2.25384
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T06:58:16Z",
+          "avg_ns": 73256201827,
+          "stddev_ns": 1090713284,
+          "avg_ts": 1.747548,
+          "stddev_ts": 0.025798,
+          "samples_ns": [
+            72610365029,
+            72642730034,
+            74515510418
+          ],
+          "samples_ts": [
+            1.76283,
+            1.76205,
+            1.71776
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1388
+    },
+    {
+      "timestamp_utc": "2025-12-14T07:20:52.474816+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:01:58Z\",\n    \"avg_ns\": 56783261179,\n    \"stddev_ns\": 1567923,\n    \"avg_ts\": 2.254185,\n    \"stddev_ts\": 0.000061,\n    \"samples_ns\": [ 56783299403, 56781711146, 56784772990 ],\n    \"samples_ts\": [ 2.25418, 2.25425, 2.25413 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:05:45Z\",\n    \"avg_ns\": 301991560945,\n    \"stddev_ns\": 3688973646,\n    \"avg_ts\": 1.695581,\n    \"stddev_ts\": 0.020859,\n    \"samples_ns\": [ 297732064952, 304088951873, 304153666010 ],\n    \"samples_ts\": [ 1.71967, 1.68372, 1.68336 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:01:58Z",
+          "avg_ns": 56783261179,
+          "stddev_ns": 1567923,
+          "avg_ts": 2.254185,
+          "stddev_ts": 6.1e-05,
+          "samples_ns": [
+            56783299403,
+            56781711146,
+            56784772990
+          ],
+          "samples_ts": [
+            2.25418,
+            2.25425,
+            2.25413
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:05:45Z",
+          "avg_ns": 301991560945,
+          "stddev_ns": 3688973646,
+          "avg_ts": 1.695581,
+          "stddev_ts": 0.020859,
+          "samples_ns": [
+            297732064952,
+            304088951873,
+            304153666010
+          ],
+          "samples_ts": [
+            1.71967,
+            1.68372,
+            1.68336
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1389
+    },
+    {
+      "timestamp_utc": "2025-12-14T07:39:51.214939+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:20:53Z\",\n    \"avg_ns\": 229431557340,\n    \"stddev_ns\": 152446986,\n    \"avg_ts\": 2.231603,\n    \"stddev_ts\": 0.001483,\n    \"samples_ns\": [ 229255527361, 229519982967, 229519161692 ],\n    \"samples_ts\": [ 2.23332, 2.23074, 2.23075 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:36:11Z\",\n    \"avg_ns\": 72981969825,\n    \"stddev_ns\": 511553427,\n    \"avg_ts\": 1.753915,\n    \"stddev_ts\": 0.012244,\n    \"samples_ns\": [ 72692135301, 72681147563, 73572626613 ],\n    \"samples_ts\": [ 1.76085, 1.76112, 1.73978 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:20:53Z",
+          "avg_ns": 229431557340,
+          "stddev_ns": 152446986,
+          "avg_ts": 2.231603,
+          "stddev_ts": 0.001483,
+          "samples_ns": [
+            229255527361,
+            229519982967,
+            229519161692
+          ],
+          "samples_ts": [
+            2.23332,
+            2.23074,
+            2.23075
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:36:11Z",
+          "avg_ns": 72981969825,
+          "stddev_ns": 511553427,
+          "avg_ts": 1.753915,
+          "stddev_ts": 0.012244,
+          "samples_ns": [
+            72692135301,
+            72681147563,
+            73572626613
+          ],
+          "samples_ts": [
+            1.76085,
+            1.76112,
+            1.73978
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1390
+    },
+    {
+      "timestamp_utc": "2025-12-14T08:10:16.343833+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:39:52Z\",\n    \"avg_ns\": 229560573790,\n    \"stddev_ns\": 4582457,\n    \"avg_ts\": 2.230348,\n    \"stddev_ts\": 0.000044,\n    \"samples_ns\": [ 229555312246, 229563284988, 229563124137 ],\n    \"samples_ts\": [ 2.2304, 2.23032, 2.23032 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T07:55:10Z\",\n    \"avg_ns\": 301624696817,\n    \"stddev_ns\": 1101346445,\n    \"avg_ts\": 1.697720,\n    \"stddev_ts\": 0.025170,\n    \"samples_ns\": [ 296504839475, 304182877333, 304186373645 ],\n    \"samples_ts\": [ 1.72678, 1.6832, 1.68318 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:39:52Z",
+          "avg_ns": 229560573790,
+          "stddev_ns": 4582457,
+          "avg_ts": 2.230348,
+          "stddev_ts": 4.4e-05,
+          "samples_ns": [
+            229555312246,
+            229563284988,
+            229563124137
+          ],
+          "samples_ts": [
+            2.2304,
+            2.23032,
+            2.23032
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T07:55:10Z",
+          "avg_ns": 301624696817,
+          "stddev_ns": 1101346445,
+          "avg_ts": 1.69772,
+          "stddev_ts": 0.02517,
+          "samples_ns": [
+            296504839475,
+            304182877333,
+            304186373645
+          ],
+          "samples_ts": [
+            1.72678,
+            1.6832,
+            1.68318
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1391
+    },
+    {
+      "timestamp_utc": "2025-12-14T08:17:43.562325+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:10:17Z\",\n    \"avg_ns\": 56806008548,\n    \"stddev_ns\": 2113610,\n    \"avg_ts\": 2.253283,\n    \"stddev_ts\": 0.000083,\n    \"samples_ns\": [ 56807135082, 56803585514, 56807305049 ],\n    \"samples_ts\": [ 2.25324, 2.25338, 2.25323 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:14:04Z\",\n    \"avg_ns\": 72671773288,\n    \"stddev_ns\": 197558268,\n    \"avg_ts\": 1.761353,\n    \"stddev_ts\": 0.004781,\n    \"samples_ns\": [ 72561984419, 72553494400, 72899841046 ],\n    \"samples_ts\": [ 1.76401, 1.76422, 1.75583 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:10:17Z",
+          "avg_ns": 56806008548,
+          "stddev_ns": 2113610,
+          "avg_ts": 2.253283,
+          "stddev_ts": 8.3e-05,
+          "samples_ns": [
+            56807135082,
+            56803585514,
+            56807305049
+          ],
+          "samples_ts": [
+            2.25324,
+            2.25338,
+            2.25323
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:14:04Z",
+          "avg_ns": 72671773288,
+          "stddev_ns": 197558268,
+          "avg_ts": 1.761353,
+          "stddev_ts": 0.004781,
+          "samples_ns": [
+            72561984419,
+            72553494400,
+            72899841046
+          ],
+          "samples_ts": [
+            1.76401,
+            1.76422,
+            1.75583
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1392
+    },
+    {
+      "timestamp_utc": "2025-12-14T08:36:37.215710+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:17:44Z\",\n    \"avg_ns\": 56781843300,\n    \"stddev_ns\": 1840910,\n    \"avg_ts\": 2.254242,\n    \"stddev_ts\": 0.000072,\n    \"samples_ns\": [ 56783359175, 56782353733, 56779816993 ],\n    \"samples_ts\": [ 2.25418, 2.25422, 2.25432 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:21:31Z\",\n    \"avg_ns\": 301561190854,\n    \"stddev_ns\": 1972214947,\n    \"avg_ts\": 1.698112,\n    \"stddev_ts\": 0.026856,\n    \"samples_ns\": [ 296103919387, 304285706367, 304293946808 ],\n    \"samples_ts\": [ 1.72912, 1.68263, 1.68258 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:17:44Z",
+          "avg_ns": 56781843300,
+          "stddev_ns": 1840910,
+          "avg_ts": 2.254242,
+          "stddev_ts": 7.2e-05,
+          "samples_ns": [
+            56783359175,
+            56782353733,
+            56779816993
+          ],
+          "samples_ts": [
+            2.25418,
+            2.25422,
+            2.25432
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:21:31Z",
+          "avg_ns": 301561190854,
+          "stddev_ns": 1972214947,
+          "avg_ts": 1.698112,
+          "stddev_ts": 0.026856,
+          "samples_ns": [
+            296103919387,
+            304285706367,
+            304293946808
+          ],
+          "samples_ts": [
+            1.72912,
+            1.68263,
+            1.68258
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1393
+    },
+    {
+      "timestamp_utc": "2025-12-14T08:55:29.245485+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:36:38Z\",\n    \"avg_ns\": 228010988156,\n    \"stddev_ns\": 9592636,\n    \"avg_ts\": 2.245506,\n    \"stddev_ts\": 0.000094,\n    \"samples_ns\": [ 228019110523, 228013414116, 228000439831 ],\n    \"samples_ts\": [ 2.24543, 2.24548, 2.24561 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:51:50Z\",\n    \"avg_ns\": 72651861319,\n    \"stddev_ns\": 28335365,\n    \"avg_ts\": 1.761827,\n    \"stddev_ts\": 0.000687,\n    \"samples_ns\": [ 72644790935, 72627733271, 72683059753 ],\n    \"samples_ts\": [ 1.762, 1.76241, 1.76107 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:36:38Z",
+          "avg_ns": 228010988156,
+          "stddev_ns": 9592636,
+          "avg_ts": 2.245506,
+          "stddev_ts": 9.4e-05,
+          "samples_ns": [
+            228019110523,
+            228013414116,
+            228000439831
+          ],
+          "samples_ts": [
+            2.24543,
+            2.24548,
+            2.24561
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:51:50Z",
+          "avg_ns": 72651861319,
+          "stddev_ns": 28335365,
+          "avg_ts": 1.761827,
+          "stddev_ts": 0.000687,
+          "samples_ns": [
+            72644790935,
+            72627733271,
+            72683059753
+          ],
+          "samples_ts": [
+            1.762,
+            1.76241,
+            1.76107
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1394
+    },
+    {
+      "timestamp_utc": "2025-12-14T09:25:46.197847+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T08:55:30Z\",\n    \"avg_ns\": 227809010829,\n    \"stddev_ns\": 9499739,\n    \"avg_ts\": 2.247497,\n    \"stddev_ts\": 0.000094,\n    \"samples_ns\": [ 227798650537, 227817275696, 227811106255 ],\n    \"samples_ts\": [ 2.2476, 2.24742, 2.24748 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:10:41Z\",\n    \"avg_ns\": 301234227819,\n    \"stddev_ns\": 4154477490,\n    \"avg_ts\": 1.700008,\n    \"stddev_ts\": 0.029331,\n    \"samples_ns\": [ 295292808659, 304292736747, 304117138052 ],\n    \"samples_ts\": [ 1.73387, 1.68259, 1.68356 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T08:55:30Z",
+          "avg_ns": 227809010829,
+          "stddev_ns": 9499739,
+          "avg_ts": 2.247497,
+          "stddev_ts": 9.4e-05,
+          "samples_ns": [
+            227798650537,
+            227817275696,
+            227811106255
+          ],
+          "samples_ts": [
+            2.2476,
+            2.24742,
+            2.24748
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:10:41Z",
+          "avg_ns": 301234227819,
+          "stddev_ns": 4154477490,
+          "avg_ts": 1.700008,
+          "stddev_ts": 0.029331,
+          "samples_ns": [
+            295292808659,
+            304292736747,
+            304117138052
+          ],
+          "samples_ts": [
+            1.73387,
+            1.68259,
+            1.68356
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1395
+    },
+    {
+      "timestamp_utc": "2025-12-14T09:33:14.244994+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:25:47Z\",\n    \"avg_ns\": 56785759814,\n    \"stddev_ns\": 314635,\n    \"avg_ts\": 2.254086,\n    \"stddev_ts\": 0.000012,\n    \"samples_ns\": [ 56785469722, 56785715439, 56786094281 ],\n    \"samples_ts\": [ 2.2541, 2.25409, 2.25407 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:29:34Z\",\n    \"avg_ns\": 72969603496,\n    \"stddev_ns\": 534483824,\n    \"avg_ts\": 1.754218,\n    \"stddev_ts\": 0.012795,\n    \"samples_ns\": [ 72662915587, 72659126601, 73586768301 ],\n    \"samples_ts\": [ 1.76156, 1.76165, 1.73944 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:25:47Z",
+          "avg_ns": 56785759814,
+          "stddev_ns": 314635,
+          "avg_ts": 2.254086,
+          "stddev_ts": 1.2e-05,
+          "samples_ns": [
+            56785469722,
+            56785715439,
+            56786094281
+          ],
+          "samples_ts": [
+            2.2541,
+            2.25409,
+            2.25407
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:29:34Z",
+          "avg_ns": 72969603496,
+          "stddev_ns": 534483824,
+          "avg_ts": 1.754218,
+          "stddev_ts": 0.012795,
+          "samples_ns": [
+            72662915587,
+            72659126601,
+            73586768301
+          ],
+          "samples_ts": [
+            1.76156,
+            1.76165,
+            1.73944
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1396
+    },
+    {
+      "timestamp_utc": "2025-12-14T09:52:08.965191+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:33:15Z\",\n    \"avg_ns\": 56763657874,\n    \"stddev_ns\": 5096205,\n    \"avg_ts\": 2.254964,\n    \"stddev_ts\": 0.000202,\n    \"samples_ns\": [ 56767230482, 56765921096, 56757822044 ],\n    \"samples_ts\": [ 2.25482, 2.25487, 2.2552 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:37:02Z\",\n    \"avg_ns\": 301891406553,\n    \"stddev_ns\": 4041523848,\n    \"avg_ts\": 1.696178,\n    \"stddev_ts\": 0.022884,\n    \"samples_ns\": [ 297224657124, 304223465576, 304226096961 ],\n    \"samples_ts\": [ 1.7226, 1.68297, 1.68296 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:33:15Z",
+          "avg_ns": 56763657874,
+          "stddev_ns": 5096205,
+          "avg_ts": 2.254964,
+          "stddev_ts": 0.000202,
+          "samples_ns": [
+            56767230482,
+            56765921096,
+            56757822044
+          ],
+          "samples_ts": [
+            2.25482,
+            2.25487,
+            2.2552
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:37:02Z",
+          "avg_ns": 301891406553,
+          "stddev_ns": 4041523848,
+          "avg_ts": 1.696178,
+          "stddev_ts": 0.022884,
+          "samples_ns": [
+            297224657124,
+            304223465576,
+            304226096961
+          ],
+          "samples_ts": [
+            1.7226,
+            1.68297,
+            1.68296
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1397
+    },
+    {
+      "timestamp_utc": "2025-12-14T10:11:02.581045+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T09:52:10Z\",\n    \"avg_ns\": 228356576929,\n    \"stddev_ns\": 10920963,\n    \"avg_ts\": 2.242108,\n    \"stddev_ts\": 0.000107,\n    \"samples_ns\": [ 228355453751, 228346271461, 228368005576 ],\n    \"samples_ts\": [ 2.24212, 2.24221, 2.242 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:07:23Z\",\n    \"avg_ns\": 72721843514,\n    \"stddev_ns\": 86169022,\n    \"avg_ts\": 1.760133,\n    \"stddev_ts\": 0.002084,\n    \"samples_ns\": [ 72673913766, 72670296256, 72821320521 ],\n    \"samples_ts\": [ 1.76129, 1.76138, 1.75773 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T09:52:10Z",
+          "avg_ns": 228356576929,
+          "stddev_ns": 10920963,
+          "avg_ts": 2.242108,
+          "stddev_ts": 0.000107,
+          "samples_ns": [
+            228355453751,
+            228346271461,
+            228368005576
+          ],
+          "samples_ts": [
+            2.24212,
+            2.24221,
+            2.242
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:07:23Z",
+          "avg_ns": 72721843514,
+          "stddev_ns": 86169022,
+          "avg_ts": 1.760133,
+          "stddev_ts": 0.002084,
+          "samples_ns": [
+            72673913766,
+            72670296256,
+            72821320521
+          ],
+          "samples_ts": [
+            1.76129,
+            1.76138,
+            1.75773
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1398
+    },
+    {
+      "timestamp_utc": "2025-12-14T10:41:22.376248+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:11:03Z\",\n    \"avg_ns\": 228376035081,\n    \"stddev_ns\": 3362894,\n    \"avg_ts\": 2.241916,\n    \"stddev_ts\": 0.000033,\n    \"samples_ns\": [ 228374134562, 228379917930, 228374052751 ],\n    \"samples_ts\": [ 2.24194, 2.24188, 2.24194 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:26:17Z\",\n    \"avg_ns\": 301424510275,\n    \"stddev_ns\": 1762982509,\n    \"avg_ts\": 1.698872,\n    \"stddev_ts\": 0.026402,\n    \"samples_ns\": [ 296063581113, 304118402211, 304091547501 ],\n    \"samples_ts\": [ 1.72936, 1.68355, 1.6837 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:11:03Z",
+          "avg_ns": 228376035081,
+          "stddev_ns": 3362894,
+          "avg_ts": 2.241916,
+          "stddev_ts": 3.3e-05,
+          "samples_ns": [
+            228374134562,
+            228379917930,
+            228374052751
+          ],
+          "samples_ts": [
+            2.24194,
+            2.24188,
+            2.24194
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:26:17Z",
+          "avg_ns": 301424510275,
+          "stddev_ns": 1762982509,
+          "avg_ts": 1.698872,
+          "stddev_ts": 0.026402,
+          "samples_ns": [
+            296063581113,
+            304118402211,
+            304091547501
+          ],
+          "samples_ts": [
+            1.72936,
+            1.68355,
+            1.6837
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1399
+    },
+    {
+      "timestamp_utc": "2025-12-14T10:48:49.570735+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:41:23Z\",\n    \"avg_ns\": 56799577023,\n    \"stddev_ns\": 678496,\n    \"avg_ts\": 2.253538,\n    \"stddev_ts\": 0.000025,\n    \"samples_ns\": [ 56800170736, 56798907089, 56799653245 ],\n    \"samples_ts\": [ 2.25351, 2.25356, 2.25353 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:45:10Z\",\n    \"avg_ns\": 72676396439,\n    \"stddev_ns\": 197235821,\n    \"avg_ts\": 1.761241,\n    \"stddev_ts\": 0.004772,\n    \"samples_ns\": [ 72556040361, 72569130056, 72904018902 ],\n    \"samples_ts\": [ 1.76415, 1.76384, 1.75573 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:41:23Z",
+          "avg_ns": 56799577023,
+          "stddev_ns": 678496,
+          "avg_ts": 2.253538,
+          "stddev_ts": 2.5e-05,
+          "samples_ns": [
+            56800170736,
+            56798907089,
+            56799653245
+          ],
+          "samples_ts": [
+            2.25351,
+            2.25356,
+            2.25353
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:45:10Z",
+          "avg_ns": 72676396439,
+          "stddev_ns": 197235821,
+          "avg_ts": 1.761241,
+          "stddev_ts": 0.004772,
+          "samples_ns": [
+            72556040361,
+            72569130056,
+            72904018902
+          ],
+          "samples_ts": [
+            1.76415,
+            1.76384,
+            1.75573
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1400
+    },
+    {
+      "timestamp_utc": "2025-12-14T11:07:43.712939+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:48:50Z\",\n    \"avg_ns\": 56780579737,\n    \"stddev_ns\": 2787815,\n    \"avg_ts\": 2.254292,\n    \"stddev_ts\": 0.000110,\n    \"samples_ns\": [ 56783605202, 56779957667, 56778176344 ],\n    \"samples_ts\": [ 2.25417, 2.25432, 2.25439 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T10:52:38Z\",\n    \"avg_ns\": 301670070133,\n    \"stddev_ns\": 4248960664,\n    \"avg_ts\": 1.697445,\n    \"stddev_ts\": 0.024104,\n    \"samples_ns\": [ 296763794868, 304126908532, 304119507000 ],\n    \"samples_ts\": [ 1.72528, 1.68351, 1.68355 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:48:50Z",
+          "avg_ns": 56780579737,
+          "stddev_ns": 2787815,
+          "avg_ts": 2.254292,
+          "stddev_ts": 0.00011,
+          "samples_ns": [
+            56783605202,
+            56779957667,
+            56778176344
+          ],
+          "samples_ts": [
+            2.25417,
+            2.25432,
+            2.25439
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T10:52:38Z",
+          "avg_ns": 301670070133,
+          "stddev_ns": 4248960664,
+          "avg_ts": 1.697445,
+          "stddev_ts": 0.024104,
+          "samples_ns": [
+            296763794868,
+            304126908532,
+            304119507000
+          ],
+          "samples_ts": [
+            1.72528,
+            1.68351,
+            1.68355
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1401
+    },
+    {
+      "timestamp_utc": "2025-12-14T11:26:41.546344+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:07:44Z\",\n    \"avg_ns\": 229483648501,\n    \"stddev_ns\": 6046234,\n    \"avg_ts\": 2.231096,\n    \"stddev_ts\": 0.000059,\n    \"samples_ns\": [ 229490478361, 229479075027, 229481392116 ],\n    \"samples_ts\": [ 2.23103, 2.23114, 2.23112 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:23:02Z\",\n    \"avg_ns\": 72630981511,\n    \"stddev_ns\": 50372368,\n    \"avg_ts\": 1.762334,\n    \"stddev_ts\": 0.001222,\n    \"samples_ns\": [ 72609730777, 72594716791, 72688496965 ],\n    \"samples_ts\": [ 1.76285, 1.76321, 1.76094 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:07:44Z",
+          "avg_ns": 229483648501,
+          "stddev_ns": 6046234,
+          "avg_ts": 2.231096,
+          "stddev_ts": 5.9e-05,
+          "samples_ns": [
+            229490478361,
+            229479075027,
+            229481392116
+          ],
+          "samples_ts": [
+            2.23103,
+            2.23114,
+            2.23112
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:23:02Z",
+          "avg_ns": 72630981511,
+          "stddev_ns": 50372368,
+          "avg_ts": 1.762334,
+          "stddev_ts": 0.001222,
+          "samples_ns": [
+            72609730777,
+            72594716791,
+            72688496965
+          ],
+          "samples_ts": [
+            1.76285,
+            1.76321,
+            1.76094
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1402
+    },
+    {
+      "timestamp_utc": "2025-12-14T11:57:06.266044+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "3",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:26:42Z\",\n    \"avg_ns\": 229538937365,\n    \"stddev_ns\": 4057356,\n    \"avg_ts\": 2.230558,\n    \"stddev_ts\": 0.000039,\n    \"samples_ns\": [ 229537778690, 229535614690, 229543418716 ],\n    \"samples_ts\": [ 2.23057, 2.23059, 2.23051 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 3,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:42:01Z\",\n    \"avg_ns\": 301495418992,\n    \"stddev_ns\": 1866399833,\n    \"avg_ts\": 1.698477,\n    \"stddev_ts\": 0.026620,\n    \"samples_ns\": [ 296088006349, 304188709561, 304209541067 ],\n    \"samples_ts\": [ 1.72922, 1.68317, 1.68305 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:26:42Z",
+          "avg_ns": 229538937365,
+          "stddev_ns": 4057356,
+          "avg_ts": 2.230558,
+          "stddev_ts": 3.9e-05,
+          "samples_ns": [
+            229537778690,
+            229535614690,
+            229543418716
+          ],
+          "samples_ts": [
+            2.23057,
+            2.23059,
+            2.23051
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 3,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:42:01Z",
+          "avg_ns": 301495418992,
+          "stddev_ns": 1866399833,
+          "avg_ts": 1.698477,
+          "stddev_ts": 0.02662,
+          "samples_ns": [
+            296088006349,
+            304188709561,
+            304209541067
+          ],
+          "samples_ts": [
+            1.72922,
+            1.68317,
+            1.68305
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 3,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1403
+    },
+    {
+      "timestamp_utc": "2025-12-14T12:03:28.818264+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:57:07Z\",\n    \"avg_ns\": 43024608159,\n    \"stddev_ns\": 10570457,\n    \"avg_ts\": 2.975042,\n    \"stddev_ts\": 0.000731,\n    \"samples_ns\": [ 43028602419, 43032595165, 43012626895 ],\n    \"samples_ts\": [ 2.97477, 2.97449, 2.97587 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T11:59:59Z\",\n    \"avg_ns\": 69506523327,\n    \"stddev_ns\": 90362484,\n    \"avg_ts\": 1.841556,\n    \"stddev_ts\": 0.002396,\n    \"samples_ns\": [ 69402402474, 69564452747, 69552714761 ],\n    \"samples_ts\": [ 1.84432, 1.84002, 1.84033 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:57:07Z",
+          "avg_ns": 43024608159,
+          "stddev_ns": 10570457,
+          "avg_ts": 2.975042,
+          "stddev_ts": 0.000731,
+          "samples_ns": [
+            43028602419,
+            43032595165,
+            43012626895
+          ],
+          "samples_ts": [
+            2.97477,
+            2.97449,
+            2.97587
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T11:59:59Z",
+          "avg_ns": 69506523327,
+          "stddev_ns": 90362484,
+          "avg_ts": 1.841556,
+          "stddev_ts": 0.002396,
+          "samples_ns": [
+            69402402474,
+            69564452747,
+            69552714761
+          ],
+          "samples_ts": [
+            1.84432,
+            1.84002,
+            1.84033
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1404
+    },
+    {
+      "timestamp_utc": "2025-12-14T12:20:25.871898+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:03:30Z\",\n    \"avg_ns\": 43015151620,\n    \"stddev_ns\": 7773033,\n    \"avg_ts\": 2.975696,\n    \"stddev_ts\": 0.000538,\n    \"samples_ns\": [ 43021264226, 43017783382, 43006407253 ],\n    \"samples_ts\": [ 2.97527, 2.97551, 2.9763 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:06:22Z\",\n    \"avg_ns\": 281012671982,\n    \"stddev_ns\": 387981159,\n    \"avg_ts\": 1.821984,\n    \"stddev_ts\": 0.002515,\n    \"samples_ns\": [ 280655407749, 281425399206, 280957208993 ],\n    \"samples_ts\": [ 1.8243, 1.81931, 1.82234 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:03:30Z",
+          "avg_ns": 43015151620,
+          "stddev_ns": 7773033,
+          "avg_ts": 2.975696,
+          "stddev_ts": 0.000538,
+          "samples_ns": [
+            43021264226,
+            43017783382,
+            43006407253
+          ],
+          "samples_ts": [
+            2.97527,
+            2.97551,
+            2.9763
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:06:22Z",
+          "avg_ns": 281012671982,
+          "stddev_ns": 387981159,
+          "avg_ts": 1.821984,
+          "stddev_ts": 0.002515,
+          "samples_ns": [
+            280655407749,
+            281425399206,
+            280957208993
+          ],
+          "samples_ts": [
+            1.8243,
+            1.81931,
+            1.82234
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1405
+    },
+    {
+      "timestamp_utc": "2025-12-14T12:35:26.397398+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:20:27Z\",\n    \"avg_ns\": 172576820553,\n    \"stddev_ns\": 5543443,\n    \"avg_ts\": 2.966795,\n    \"stddev_ts\": 0.000095,\n    \"samples_ns\": [ 172571131204, 172582205551, 172577124904 ],\n    \"samples_ts\": [ 2.96689, 2.9667, 2.96679 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:31:57Z\",\n    \"avg_ns\": 69381420608,\n    \"stddev_ns\": 69024869,\n    \"avg_ts\": 1.844876,\n    \"stddev_ts\": 0.001836,\n    \"samples_ns\": [ 69303343497, 69434330216, 69406588111 ],\n    \"samples_ts\": [ 1.84695, 1.84347, 1.84421 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:20:27Z",
+          "avg_ns": 172576820553,
+          "stddev_ns": 5543443,
+          "avg_ts": 2.966795,
+          "stddev_ts": 9.5e-05,
+          "samples_ns": [
+            172571131204,
+            172582205551,
+            172577124904
+          ],
+          "samples_ts": [
+            2.96689,
+            2.9667,
+            2.96679
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:31:57Z",
+          "avg_ns": 69381420608,
+          "stddev_ns": 69024869,
+          "avg_ts": 1.844876,
+          "stddev_ts": 0.001836,
+          "samples_ns": [
+            69303343497,
+            69434330216,
+            69406588111
+          ],
+          "samples_ts": [
+            1.84695,
+            1.84347,
+            1.84421
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1406
+    },
+    {
+      "timestamp_utc": "2025-12-14T13:01:01.975684+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:35:27Z\",\n    \"avg_ns\": 172586715089,\n    \"stddev_ns\": 24010304,\n    \"avg_ts\": 2.966625,\n    \"stddev_ts\": 0.000413,\n    \"samples_ns\": [ 172578611278, 172567809026, 172613724964 ],\n    \"samples_ts\": [ 2.96676, 2.96695, 2.96616 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T12:46:58Z\",\n    \"avg_ns\": 281075401937,\n    \"stddev_ns\": 206927146,\n    \"avg_ts\": 1.821576,\n    \"stddev_ts\": 0.001341,\n    \"samples_ns\": [ 281246868151, 281133781102, 280845556558 ],\n    \"samples_ts\": [ 1.82046, 1.8212, 1.82307 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:35:27Z",
+          "avg_ns": 172586715089,
+          "stddev_ns": 24010304,
+          "avg_ts": 2.966625,
+          "stddev_ts": 0.000413,
+          "samples_ns": [
+            172578611278,
+            172567809026,
+            172613724964
+          ],
+          "samples_ts": [
+            2.96676,
+            2.96695,
+            2.96616
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T12:46:58Z",
+          "avg_ns": 281075401937,
+          "stddev_ns": 206927146,
+          "avg_ts": 1.821576,
+          "stddev_ts": 0.001341,
+          "samples_ns": [
+            281246868151,
+            281133781102,
+            280845556558
+          ],
+          "samples_ts": [
+            1.82046,
+            1.8212,
+            1.82307
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1407
+    },
+    {
+      "timestamp_utc": "2025-12-14T13:07:24.669640+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:01:03Z\",\n    \"avg_ns\": 43013159669,\n    \"stddev_ns\": 3734203,\n    \"avg_ts\": 2.975833,\n    \"stddev_ts\": 0.000258,\n    \"samples_ns\": [ 43012258912, 43017250066, 43009970031 ],\n    \"samples_ts\": [ 2.9759, 2.97555, 2.97605 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:03:55Z\",\n    \"avg_ns\": 69546374160,\n    \"stddev_ns\": 46103625,\n    \"avg_ts\": 1.840499,\n    \"stddev_ts\": 0.001220,\n    \"samples_ns\": [ 69531389167, 69509628748, 69598104567 ],\n    \"samples_ts\": [ 1.8409, 1.84147, 1.83913 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:01:03Z",
+          "avg_ns": 43013159669,
+          "stddev_ns": 3734203,
+          "avg_ts": 2.975833,
+          "stddev_ts": 0.000258,
+          "samples_ns": [
+            43012258912,
+            43017250066,
+            43009970031
+          ],
+          "samples_ts": [
+            2.9759,
+            2.97555,
+            2.97605
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:03:55Z",
+          "avg_ns": 69546374160,
+          "stddev_ns": 46103625,
+          "avg_ts": 1.840499,
+          "stddev_ts": 0.00122,
+          "samples_ns": [
+            69531389167,
+            69509628748,
+            69598104567
+          ],
+          "samples_ts": [
+            1.8409,
+            1.84147,
+            1.83913
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1408
+    },
+    {
+      "timestamp_utc": "2025-12-14T13:24:21.874648+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:07:25Z\",\n    \"avg_ns\": 43022002638,\n    \"stddev_ns\": 5667767,\n    \"avg_ts\": 2.975222,\n    \"stddev_ts\": 0.000392,\n    \"samples_ns\": [ 43017096216, 43020705001, 43028206697 ],\n    \"samples_ts\": [ 2.97556, 2.97531, 2.97479 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:10:18Z\",\n    \"avg_ns\": 281049500647,\n    \"stddev_ns\": 3121251656,\n    \"avg_ts\": 1.821751,\n    \"stddev_ts\": 0.004676,\n    \"samples_ns\": [ 280219420270, 281509980836, 281419100835 ],\n    \"samples_ts\": [ 1.82714, 1.81876, 1.81935 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:07:25Z",
+          "avg_ns": 43022002638,
+          "stddev_ns": 5667767,
+          "avg_ts": 2.975222,
+          "stddev_ts": 0.000392,
+          "samples_ns": [
+            43017096216,
+            43020705001,
+            43028206697
+          ],
+          "samples_ts": [
+            2.97556,
+            2.97531,
+            2.97479
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:10:18Z",
+          "avg_ns": 281049500647,
+          "stddev_ns": 3121251656,
+          "avg_ts": 1.821751,
+          "stddev_ts": 0.004676,
+          "samples_ns": [
+            280219420270,
+            281509980836,
+            281419100835
+          ],
+          "samples_ts": [
+            1.82714,
+            1.81876,
+            1.81935
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1409
+    },
+    {
+      "timestamp_utc": "2025-12-14T13:39:24.007542+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:24:23Z\",\n    \"avg_ns\": 173022288825,\n    \"stddev_ns\": 19871589,\n    \"avg_ts\": 2.959156,\n    \"stddev_ts\": 0.000340,\n    \"samples_ns\": [ 173042950511, 173003315000, 173020600964 ],\n    \"samples_ts\": [ 2.9588, 2.95948, 2.95919 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:35:55Z\",\n    \"avg_ns\": 69364412357,\n    \"stddev_ns\": 252618351,\n    \"avg_ts\": 1.845343,\n    \"stddev_ts\": 0.006735,\n    \"samples_ns\": [ 69072714442, 69509735550, 69510787079 ],\n    \"samples_ts\": [ 1.85312, 1.84147, 1.84144 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:24:23Z",
+          "avg_ns": 173022288825,
+          "stddev_ns": 19871589,
+          "avg_ts": 2.959156,
+          "stddev_ts": 0.00034,
+          "samples_ns": [
+            173042950511,
+            173003315000,
+            173020600964
+          ],
+          "samples_ts": [
+            2.9588,
+            2.95948,
+            2.95919
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:35:55Z",
+          "avg_ns": 69364412357,
+          "stddev_ns": 252618351,
+          "avg_ts": 1.845343,
+          "stddev_ts": 0.006735,
+          "samples_ns": [
+            69072714442,
+            69509735550,
+            69510787079
+          ],
+          "samples_ts": [
+            1.85312,
+            1.84147,
+            1.84144
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1410
+    },
+    {
+      "timestamp_utc": "2025-12-14T14:05:03.714798+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:39:25Z\",\n    \"avg_ns\": 173003453379,\n    \"stddev_ns\": 24161003,\n    \"avg_ts\": 2.959479,\n    \"stddev_ts\": 0.000413,\n    \"samples_ns\": [ 172977191345, 173008451917, 173024716877 ],\n    \"samples_ts\": [ 2.95993, 2.95939, 2.95911 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T13:50:57Z\",\n    \"avg_ns\": 281914588972,\n    \"stddev_ns\": 72551000,\n    \"avg_ts\": 1.816153,\n    \"stddev_ts\": 0.000467,\n    \"samples_ns\": [ 281901059947, 281849754799, 281992952170 ],\n    \"samples_ts\": [ 1.81624, 1.81657, 1.81565 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:39:25Z",
+          "avg_ns": 173003453379,
+          "stddev_ns": 24161003,
+          "avg_ts": 2.959479,
+          "stddev_ts": 0.000413,
+          "samples_ns": [
+            172977191345,
+            173008451917,
+            173024716877
+          ],
+          "samples_ts": [
+            2.95993,
+            2.95939,
+            2.95911
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T13:50:57Z",
+          "avg_ns": 281914588972,
+          "stddev_ns": 72551000,
+          "avg_ts": 1.816153,
+          "stddev_ts": 0.000467,
+          "samples_ns": [
+            281901059947,
+            281849754799,
+            281992952170
+          ],
+          "samples_ts": [
+            1.81624,
+            1.81657,
+            1.81565
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1411
+    },
+    {
+      "timestamp_utc": "2025-12-14T14:11:26.074459+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:05:04Z\",\n    \"avg_ns\": 43016346330,\n    \"stddev_ns\": 6471825,\n    \"avg_ts\": 2.975613,\n    \"stddev_ts\": 0.000448,\n    \"samples_ns\": [ 43022392000, 43009519296, 43017127694 ],\n    \"samples_ts\": [ 2.97519, 2.97609, 2.97556 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:07:57Z\",\n    \"avg_ns\": 69445493644,\n    \"stddev_ns\": 185478138,\n    \"avg_ts\": 1.843181,\n    \"stddev_ts\": 0.004930,\n    \"samples_ns\": [ 69233670483, 69578797761, 69524012688 ],\n    \"samples_ts\": [ 1.84881, 1.83964, 1.84109 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:05:04Z",
+          "avg_ns": 43016346330,
+          "stddev_ns": 6471825,
+          "avg_ts": 2.975613,
+          "stddev_ts": 0.000448,
+          "samples_ns": [
+            43022392000,
+            43009519296,
+            43017127694
+          ],
+          "samples_ts": [
+            2.97519,
+            2.97609,
+            2.97556
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:07:57Z",
+          "avg_ns": 69445493644,
+          "stddev_ns": 185478138,
+          "avg_ts": 1.843181,
+          "stddev_ts": 0.00493,
+          "samples_ns": [
+            69233670483,
+            69578797761,
+            69524012688
+          ],
+          "samples_ts": [
+            1.84881,
+            1.83964,
+            1.84109
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1412
+    },
+    {
+      "timestamp_utc": "2025-12-14T14:28:24.980753+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:11:27Z\",\n    \"avg_ns\": 43019824853,\n    \"stddev_ns\": 7472925,\n    \"avg_ts\": 2.975372,\n    \"stddev_ts\": 0.000517,\n    \"samples_ns\": [ 43027428940, 43012496482, 43019549138 ],\n    \"samples_ts\": [ 2.97485, 2.97588, 2.97539 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:14:19Z\",\n    \"avg_ns\": 281614302513,\n    \"stddev_ns\": 702395411,\n    \"avg_ts\": 1.818097,\n    \"stddev_ts\": 0.004541,\n    \"samples_ns\": [ 280807321259, 282088113652, 281947472629 ],\n    \"samples_ts\": [ 1.82331, 1.81504, 1.81594 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:11:27Z",
+          "avg_ns": 43019824853,
+          "stddev_ns": 7472925,
+          "avg_ts": 2.975372,
+          "stddev_ts": 0.000517,
+          "samples_ns": [
+            43027428940,
+            43012496482,
+            43019549138
+          ],
+          "samples_ts": [
+            2.97485,
+            2.97588,
+            2.97539
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:14:19Z",
+          "avg_ns": 281614302513,
+          "stddev_ns": 702395411,
+          "avg_ts": 1.818097,
+          "stddev_ts": 0.004541,
+          "samples_ns": [
+            280807321259,
+            282088113652,
+            281947472629
+          ],
+          "samples_ts": [
+            1.82331,
+            1.81504,
+            1.81594
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1413
+    },
+    {
+      "timestamp_utc": "2025-12-14T14:43:30.872295+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:28:26Z\",\n    \"avg_ns\": 173892861109,\n    \"stddev_ns\": 7892556,\n    \"avg_ts\": 2.944342,\n    \"stddev_ts\": 0.000133,\n    \"samples_ns\": [ 173899056285, 173884005092, 173895521952 ],\n    \"samples_ts\": [ 2.94424, 2.94449, 2.9443 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:40:01Z\",\n    \"avg_ns\": 69445336563,\n    \"stddev_ns\": 182240676,\n    \"avg_ts\": 1.843185,\n    \"stddev_ts\": 0.004844,\n    \"samples_ns\": [ 69234930273, 69547626068, 69553453349 ],\n    \"samples_ts\": [ 1.84878, 1.84047, 1.84031 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:28:26Z",
+          "avg_ns": 173892861109,
+          "stddev_ns": 7892556,
+          "avg_ts": 2.944342,
+          "stddev_ts": 0.000133,
+          "samples_ns": [
+            173899056285,
+            173884005092,
+            173895521952
+          ],
+          "samples_ts": [
+            2.94424,
+            2.94449,
+            2.9443
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:40:01Z",
+          "avg_ns": 69445336563,
+          "stddev_ns": 182240676,
+          "avg_ts": 1.843185,
+          "stddev_ts": 0.004844,
+          "samples_ns": [
+            69234930273,
+            69547626068,
+            69553453349
+          ],
+          "samples_ts": [
+            1.84878,
+            1.84047,
+            1.84031
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1414
+    },
+    {
+      "timestamp_utc": "2025-12-14T15:09:08.377401+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "512",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:43:32Z\",\n    \"avg_ns\": 173871589420,\n    \"stddev_ns\": 8646544,\n    \"avg_ts\": 2.944702,\n    \"stddev_ts\": 0.000146,\n    \"samples_ns\": [ 173862316277, 173873021409, 173879430574 ],\n    \"samples_ts\": [ 2.94486, 2.94468, 2.94457 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 512,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T14:55:07Z\",\n    \"avg_ns\": 280013328585,\n    \"stddev_ns\": 3950323435,\n    \"avg_ts\": 1.828584,\n    \"stddev_ts\": 0.016583,\n    \"samples_ns\": [ 281447833125, 281495696844, 277096455787 ],\n    \"samples_ts\": [ 1.81916, 1.81886, 1.84773 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:43:32Z",
+          "avg_ns": 173871589420,
+          "stddev_ns": 8646544,
+          "avg_ts": 2.944702,
+          "stddev_ts": 0.000146,
+          "samples_ns": [
+            173862316277,
+            173873021409,
+            173879430574
+          ],
+          "samples_ts": [
+            2.94486,
+            2.94468,
+            2.94457
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 512,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T14:55:07Z",
+          "avg_ns": 280013328585,
+          "stddev_ns": 3950323435,
+          "avg_ts": 1.828584,
+          "stddev_ts": 0.016583,
+          "samples_ns": [
+            281447833125,
+            281495696844,
+            277096455787
+          ],
+          "samples_ts": [
+            1.81916,
+            1.81886,
+            1.84773
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 512,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1415
+    },
+    {
+      "timestamp_utc": "2025-12-14T15:15:30.311954+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:09:09Z\",\n    \"avg_ns\": 43014313938,\n    \"stddev_ns\": 10929938,\n    \"avg_ts\": 2.975754,\n    \"stddev_ts\": 0.000756,\n    \"samples_ns\": [ 43022555057, 43018471454, 43001915303 ],\n    \"samples_ts\": [ 2.97518, 2.97547, 2.97661 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:12:01Z\",\n    \"avg_ns\": 69303662720,\n    \"stddev_ns\": 284406908,\n    \"avg_ts\": 1.846965,\n    \"stddev_ts\": 0.007598,\n    \"samples_ns\": [ 68975261448, 69469129979, 69466596735 ],\n    \"samples_ts\": [ 1.85574, 1.84255, 1.84261 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:09:09Z",
+          "avg_ns": 43014313938,
+          "stddev_ns": 10929938,
+          "avg_ts": 2.975754,
+          "stddev_ts": 0.000756,
+          "samples_ns": [
+            43022555057,
+            43018471454,
+            43001915303
+          ],
+          "samples_ts": [
+            2.97518,
+            2.97547,
+            2.97661
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:12:01Z",
+          "avg_ns": 69303662720,
+          "stddev_ns": 284406908,
+          "avg_ts": 1.846965,
+          "stddev_ts": 0.007598,
+          "samples_ns": [
+            68975261448,
+            69469129979,
+            69466596735
+          ],
+          "samples_ts": [
+            1.85574,
+            1.84255,
+            1.84261
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1416
+    },
+    {
+      "timestamp_utc": "2025-12-14T15:32:27.252375+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:15:31Z\",\n    \"avg_ns\": 43034353794,\n    \"stddev_ns\": 5416943,\n    \"avg_ts\": 2.974368,\n    \"stddev_ts\": 0.000374,\n    \"samples_ns\": [ 43028288757, 43036077982, 43038694644 ],\n    \"samples_ts\": [ 2.97479, 2.97425, 2.97407 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:18:23Z\",\n    \"avg_ns\": 280946536035,\n    \"stddev_ns\": 752205194,\n    \"avg_ts\": 1.822420,\n    \"stddev_ts\": 0.004887,\n    \"samples_ns\": [ 281329045325, 280079946187, 281430616593 ],\n    \"samples_ts\": [ 1.81993, 1.82805, 1.81928 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:15:31Z",
+          "avg_ns": 43034353794,
+          "stddev_ns": 5416943,
+          "avg_ts": 2.974368,
+          "stddev_ts": 0.000374,
+          "samples_ns": [
+            43028288757,
+            43036077982,
+            43038694644
+          ],
+          "samples_ts": [
+            2.97479,
+            2.97425,
+            2.97407
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:18:23Z",
+          "avg_ns": 280946536035,
+          "stddev_ns": 752205194,
+          "avg_ts": 1.82242,
+          "stddev_ts": 0.004887,
+          "samples_ns": [
+            281329045325,
+            280079946187,
+            281430616593
+          ],
+          "samples_ts": [
+            1.81993,
+            1.82805,
+            1.81928
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1417
+    },
+    {
+      "timestamp_utc": "2025-12-14T15:47:27.014290+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:32:28Z\",\n    \"avg_ns\": 172596303630,\n    \"stddev_ns\": 25959598,\n    \"avg_ts\": 2.966460,\n    \"stddev_ts\": 0.000446,\n    \"samples_ns\": [ 172620041622, 172568588217, 172600281052 ],\n    \"samples_ts\": [ 2.96605, 2.96694, 2.96639 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:43:58Z\",\n    \"avg_ns\": 69124055789,\n    \"stddev_ns\": 510076555,\n    \"avg_ts\": 1.851811,\n    \"stddev_ts\": 0.013722,\n    \"samples_ns\": [ 68536366275, 69451720645, 69384080448 ],\n    \"samples_ts\": [ 1.86762, 1.84301, 1.8448 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:32:28Z",
+          "avg_ns": 172596303630,
+          "stddev_ns": 25959598,
+          "avg_ts": 2.96646,
+          "stddev_ts": 0.000446,
+          "samples_ns": [
+            172620041622,
+            172568588217,
+            172600281052
+          ],
+          "samples_ts": [
+            2.96605,
+            2.96694,
+            2.96639
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:43:58Z",
+          "avg_ns": 69124055789,
+          "stddev_ns": 510076555,
+          "avg_ts": 1.851811,
+          "stddev_ts": 0.013722,
+          "samples_ns": [
+            68536366275,
+            69451720645,
+            69384080448
+          ],
+          "samples_ts": [
+            1.86762,
+            1.84301,
+            1.8448
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1418
+    },
+    {
+      "timestamp_utc": "2025-12-14T16:13:04.157264+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:47:28Z\",\n    \"avg_ns\": 172548527994,\n    \"stddev_ns\": 26302787,\n    \"avg_ts\": 2.967281,\n    \"stddev_ts\": 0.000452,\n    \"samples_ns\": [ 172535174400, 172531588004, 172578821580 ],\n    \"samples_ts\": [ 2.96751, 2.96757, 2.96676 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T15:58:58Z\",\n    \"avg_ns\": 281626143894,\n    \"stddev_ns\": 238034164,\n    \"avg_ts\": 1.818014,\n    \"stddev_ts\": 0.001536,\n    \"samples_ns\": [ 281418221386, 281574424589, 281885785707 ],\n    \"samples_ts\": [ 1.81936, 1.81835, 1.81634 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:47:28Z",
+          "avg_ns": 172548527994,
+          "stddev_ns": 26302787,
+          "avg_ts": 2.967281,
+          "stddev_ts": 0.000452,
+          "samples_ns": [
+            172535174400,
+            172531588004,
+            172578821580
+          ],
+          "samples_ts": [
+            2.96751,
+            2.96757,
+            2.96676
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T15:58:58Z",
+          "avg_ns": 281626143894,
+          "stddev_ns": 238034164,
+          "avg_ts": 1.818014,
+          "stddev_ts": 0.001536,
+          "samples_ns": [
+            281418221386,
+            281574424589,
+            281885785707
+          ],
+          "samples_ts": [
+            1.81936,
+            1.81835,
+            1.81634
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1419
+    },
+    {
+      "timestamp_utc": "2025-12-14T16:19:26.713625+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:13:05Z\",\n    \"avg_ns\": 43028082545,\n    \"stddev_ns\": 5458745,\n    \"avg_ts\": 2.974801,\n    \"stddev_ts\": 0.000377,\n    \"samples_ns\": [ 43027087322, 43033966438, 43023193876 ],\n    \"samples_ts\": [ 2.97487, 2.97439, 2.97514 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:15:57Z\",\n    \"avg_ns\": 69488701787,\n    \"stddev_ns\": 36720464,\n    \"avg_ts\": 1.842026,\n    \"stddev_ts\": 0.000974,\n    \"samples_ns\": [ 69449068577, 69495471191, 69521565594 ],\n    \"samples_ts\": [ 1.84308, 1.84185, 1.84116 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:13:05Z",
+          "avg_ns": 43028082545,
+          "stddev_ns": 5458745,
+          "avg_ts": 2.974801,
+          "stddev_ts": 0.000377,
+          "samples_ns": [
+            43027087322,
+            43033966438,
+            43023193876
+          ],
+          "samples_ts": [
+            2.97487,
+            2.97439,
+            2.97514
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:15:57Z",
+          "avg_ns": 69488701787,
+          "stddev_ns": 36720464,
+          "avg_ts": 1.842026,
+          "stddev_ts": 0.000974,
+          "samples_ns": [
+            69449068577,
+            69495471191,
+            69521565594
+          ],
+          "samples_ts": [
+            1.84308,
+            1.84185,
+            1.84116
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1420
+    },
+    {
+      "timestamp_utc": "2025-12-14T16:36:24.695952+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:19:27Z\",\n    \"avg_ns\": 43025487798,\n    \"stddev_ns\": 7278431,\n    \"avg_ts\": 2.974981,\n    \"stddev_ts\": 0.000503,\n    \"samples_ns\": [ 43033436492, 43023877612, 43019149290 ],\n    \"samples_ts\": [ 2.97443, 2.97509, 2.97542 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:22:20Z\",\n    \"avg_ns\": 281301883171,\n    \"stddev_ns\": 349714730,\n    \"avg_ts\": 1.820111,\n    \"stddev_ts\": 0.002264,\n    \"samples_ns\": [ 280898070995, 281505169775, 281502408744 ],\n    \"samples_ts\": [ 1.82273, 1.81879, 1.81881 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:19:27Z",
+          "avg_ns": 43025487798,
+          "stddev_ns": 7278431,
+          "avg_ts": 2.974981,
+          "stddev_ts": 0.000503,
+          "samples_ns": [
+            43033436492,
+            43023877612,
+            43019149290
+          ],
+          "samples_ts": [
+            2.97443,
+            2.97509,
+            2.97542
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:22:20Z",
+          "avg_ns": 281301883171,
+          "stddev_ns": 349714730,
+          "avg_ts": 1.820111,
+          "stddev_ts": 0.002264,
+          "samples_ns": [
+            280898070995,
+            281505169775,
+            281502408744
+          ],
+          "samples_ts": [
+            1.82273,
+            1.81879,
+            1.81881
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1421
+    },
+    {
+      "timestamp_utc": "2025-12-14T16:51:27.799789+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:36:25Z\",\n    \"avg_ns\": 173009486359,\n    \"stddev_ns\": 5214657,\n    \"avg_ts\": 2.959375,\n    \"stddev_ts\": 0.000089,\n    \"samples_ns\": [ 173011367536, 173013481916, 173003609626 ],\n    \"samples_ts\": [ 2.95934, 2.95931, 2.95948 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:47:58Z\",\n    \"avg_ns\": 69695357260,\n    \"stddev_ns\": 293044311,\n    \"avg_ts\": 1.836586,\n    \"stddev_ts\": 0.007704,\n    \"samples_ns\": [ 70033177711, 69509627227, 69543266843 ],\n    \"samples_ts\": [ 1.82771, 1.84147, 1.84058 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:36:25Z",
+          "avg_ns": 173009486359,
+          "stddev_ns": 5214657,
+          "avg_ts": 2.959375,
+          "stddev_ts": 8.9e-05,
+          "samples_ns": [
+            173011367536,
+            173013481916,
+            173003609626
+          ],
+          "samples_ts": [
+            2.95934,
+            2.95931,
+            2.95948
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:47:58Z",
+          "avg_ns": 69695357260,
+          "stddev_ns": 293044311,
+          "avg_ts": 1.836586,
+          "stddev_ts": 0.007704,
+          "samples_ns": [
+            70033177711,
+            69509627227,
+            69543266843
+          ],
+          "samples_ts": [
+            1.82771,
+            1.84147,
+            1.84058
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1422
+    },
+    {
+      "timestamp_utc": "2025-12-14T17:17:05.801672+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T16:51:29Z\",\n    \"avg_ns\": 172998261815,\n    \"stddev_ns\": 6033422,\n    \"avg_ts\": 2.959567,\n    \"stddev_ts\": 0.000103,\n    \"samples_ns\": [ 172993922557, 172995748037, 173005114853 ],\n    \"samples_ts\": [ 2.95964, 2.95961, 2.95945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:03:01Z\",\n    \"avg_ns\": 281346757824,\n    \"stddev_ns\": 77107202,\n    \"avg_ts\": 1.819818,\n    \"stddev_ts\": 0.000499,\n    \"samples_ns\": [ 281260693306, 281409529182, 281370050986 ],\n    \"samples_ts\": [ 1.82038, 1.81941, 1.81967 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T16:51:29Z",
+          "avg_ns": 172998261815,
+          "stddev_ns": 6033422,
+          "avg_ts": 2.959567,
+          "stddev_ts": 0.000103,
+          "samples_ns": [
+            172993922557,
+            172995748037,
+            173005114853
+          ],
+          "samples_ts": [
+            2.95964,
+            2.95961,
+            2.95945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:03:01Z",
+          "avg_ns": 281346757824,
+          "stddev_ns": 77107202,
+          "avg_ts": 1.819818,
+          "stddev_ts": 0.000499,
+          "samples_ns": [
+            281260693306,
+            281409529182,
+            281370050986
+          ],
+          "samples_ts": [
+            1.82038,
+            1.81941,
+            1.81967
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1423
+    },
+    {
+      "timestamp_utc": "2025-12-14T17:23:28.482951+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:17:07Z\",\n    \"avg_ns\": 43025780336,\n    \"stddev_ns\": 4350834,\n    \"avg_ts\": 2.974961,\n    \"stddev_ts\": 0.000301,\n    \"samples_ns\": [ 43023373342, 43030802805, 43023164861 ],\n    \"samples_ts\": [ 2.97513, 2.97461, 2.97514 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:19:59Z\",\n    \"avg_ns\": 69536699245,\n    \"stddev_ns\": 43580755,\n    \"avg_ts\": 1.840755,\n    \"stddev_ts\": 0.001154,\n    \"samples_ns\": [ 69486450954, 69559483505, 69564163278 ],\n    \"samples_ts\": [ 1.84209, 1.84015, 1.84003 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:17:07Z",
+          "avg_ns": 43025780336,
+          "stddev_ns": 4350834,
+          "avg_ts": 2.974961,
+          "stddev_ts": 0.000301,
+          "samples_ns": [
+            43023373342,
+            43030802805,
+            43023164861
+          ],
+          "samples_ts": [
+            2.97513,
+            2.97461,
+            2.97514
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:19:59Z",
+          "avg_ns": 69536699245,
+          "stddev_ns": 43580755,
+          "avg_ts": 1.840755,
+          "stddev_ts": 0.001154,
+          "samples_ns": [
+            69486450954,
+            69559483505,
+            69564163278
+          ],
+          "samples_ts": [
+            1.84209,
+            1.84015,
+            1.84003
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1424
+    },
+    {
+      "timestamp_utc": "2025-12-14T17:40:26.356285+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:23:29Z\",\n    \"avg_ns\": 43016991103,\n    \"stddev_ns\": 2744313,\n    \"avg_ts\": 2.975568,\n    \"stddev_ts\": 0.000190,\n    \"samples_ns\": [ 43014988864, 43020119318, 43015865127 ],\n    \"samples_ts\": [ 2.97571, 2.97535, 2.97565 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:26:21Z\",\n    \"avg_ns\": 281273087729,\n    \"stddev_ns\": 324452585,\n    \"avg_ts\": 1.820297,\n    \"stddev_ts\": 0.002101,\n    \"samples_ns\": [ 280904920766, 281517248625, 281397093796 ],\n    \"samples_ts\": [ 1.82268, 1.81872, 1.81949 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:23:29Z",
+          "avg_ns": 43016991103,
+          "stddev_ns": 2744313,
+          "avg_ts": 2.975568,
+          "stddev_ts": 0.00019,
+          "samples_ns": [
+            43014988864,
+            43020119318,
+            43015865127
+          ],
+          "samples_ts": [
+            2.97571,
+            2.97535,
+            2.97565
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:26:21Z",
+          "avg_ns": 281273087729,
+          "stddev_ns": 324452585,
+          "avg_ts": 1.820297,
+          "stddev_ts": 0.002101,
+          "samples_ns": [
+            280904920766,
+            281517248625,
+            281397093796
+          ],
+          "samples_ts": [
+            1.82268,
+            1.81872,
+            1.81949
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1425
+    },
+    {
+      "timestamp_utc": "2025-12-14T17:55:32.663584+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:40:27Z\",\n    \"avg_ns\": 173922037528,\n    \"stddev_ns\": 42306682,\n    \"avg_ts\": 2.943848,\n    \"stddev_ts\": 0.000716,\n    \"samples_ns\": [ 173873188174, 173946863237, 173946061173 ],\n    \"samples_ts\": [ 2.94467, 2.94343, 2.94344 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:52:03Z\",\n    \"avg_ns\": 69556953017,\n    \"stddev_ns\": 83929688,\n    \"avg_ts\": 1.840220,\n    \"stddev_ts\": 0.002222,\n    \"samples_ns\": [ 69460039897, 69605317938, 69605501217 ],\n    \"samples_ts\": [ 1.84279, 1.83894, 1.83894 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:40:27Z",
+          "avg_ns": 173922037528,
+          "stddev_ns": 42306682,
+          "avg_ts": 2.943848,
+          "stddev_ts": 0.000716,
+          "samples_ns": [
+            173873188174,
+            173946863237,
+            173946061173
+          ],
+          "samples_ts": [
+            2.94467,
+            2.94343,
+            2.94344
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:52:03Z",
+          "avg_ns": 69556953017,
+          "stddev_ns": 83929688,
+          "avg_ts": 1.84022,
+          "stddev_ts": 0.002222,
+          "samples_ns": [
+            69460039897,
+            69605317938,
+            69605501217
+          ],
+          "samples_ts": [
+            1.84279,
+            1.83894,
+            1.83894
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1426
+    },
+    {
+      "timestamp_utc": "2025-12-14T18:21:15.003312+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "1024",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T17:55:33Z\",\n    \"avg_ns\": 173961452828,\n    \"stddev_ns\": 8633254,\n    \"avg_ts\": 2.943181,\n    \"stddev_ts\": 0.000146,\n    \"samples_ns\": [ 173955036815, 173958053326, 173971268343 ],\n    \"samples_ts\": [ 2.94329, 2.94324, 2.94301 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 1024,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:07:09Z\",\n    \"avg_ns\": 281493039118,\n    \"stddev_ns\": 153790575,\n    \"avg_ts\": 1.818873,\n    \"stddev_ts\": 0.000994,\n    \"samples_ns\": [ 281317245918, 281559161780, 281602709656 ],\n    \"samples_ts\": [ 1.82001, 1.81845, 1.81816 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T17:55:33Z",
+          "avg_ns": 173961452828,
+          "stddev_ns": 8633254,
+          "avg_ts": 2.943181,
+          "stddev_ts": 0.000146,
+          "samples_ns": [
+            173955036815,
+            173958053326,
+            173971268343
+          ],
+          "samples_ts": [
+            2.94329,
+            2.94324,
+            2.94301
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 1024,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:07:09Z",
+          "avg_ns": 281493039118,
+          "stddev_ns": 153790575,
+          "avg_ts": 1.818873,
+          "stddev_ts": 0.000994,
+          "samples_ns": [
+            281317245918,
+            281559161780,
+            281602709656
+          ],
+          "samples_ts": [
+            1.82001,
+            1.81845,
+            1.81816
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 1024,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1427
+    },
+    {
+      "timestamp_utc": "2025-12-14T18:27:37.866307+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:21:16Z\",\n    \"avg_ns\": 43036786548,\n    \"stddev_ns\": 12665538,\n    \"avg_ts\": 2.974200,\n    \"stddev_ts\": 0.000875,\n    \"samples_ns\": [ 43045961863, 43042059572, 43022338210 ],\n    \"samples_ts\": [ 2.97357, 2.97384, 2.9752 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:24:08Z\",\n    \"avg_ns\": 69588553300,\n    \"stddev_ns\": 90738613,\n    \"avg_ts\": 1.839385,\n    \"stddev_ts\": 0.002400,\n    \"samples_ns\": [ 69483851872, 69637503301, 69644304729 ],\n    \"samples_ts\": [ 1.84215, 1.83809, 1.83791 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:21:16Z",
+          "avg_ns": 43036786548,
+          "stddev_ns": 12665538,
+          "avg_ts": 2.9742,
+          "stddev_ts": 0.000875,
+          "samples_ns": [
+            43045961863,
+            43042059572,
+            43022338210
+          ],
+          "samples_ts": [
+            2.97357,
+            2.97384,
+            2.9752
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:24:08Z",
+          "avg_ns": 69588553300,
+          "stddev_ns": 90738613,
+          "avg_ts": 1.839385,
+          "stddev_ts": 0.0024,
+          "samples_ns": [
+            69483851872,
+            69637503301,
+            69644304729
+          ],
+          "samples_ts": [
+            1.84215,
+            1.83809,
+            1.83791
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1428
+    },
+    {
+      "timestamp_utc": "2025-12-14T18:44:34.807625+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:27:39Z\",\n    \"avg_ns\": 43030881310,\n    \"stddev_ns\": 4773789,\n    \"avg_ts\": 2.974608,\n    \"stddev_ts\": 0.000330,\n    \"samples_ns\": [ 43030978451, 43035605788, 43026059691 ],\n    \"samples_ts\": [ 2.9746, 2.97428, 2.97494 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:30:31Z\",\n    \"avg_ns\": 280954900498,\n    \"stddev_ns\": 1100468093,\n    \"avg_ts\": 1.822375,\n    \"stddev_ts\": 0.007154,\n    \"samples_ns\": [ 279685641516, 281536940176, 281642119804 ],\n    \"samples_ts\": [ 1.83063, 1.81859, 1.81791 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:27:39Z",
+          "avg_ns": 43030881310,
+          "stddev_ns": 4773789,
+          "avg_ts": 2.974608,
+          "stddev_ts": 0.00033,
+          "samples_ns": [
+            43030978451,
+            43035605788,
+            43026059691
+          ],
+          "samples_ts": [
+            2.9746,
+            2.97428,
+            2.97494
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:30:31Z",
+          "avg_ns": 280954900498,
+          "stddev_ns": 1100468093,
+          "avg_ts": 1.822375,
+          "stddev_ts": 0.007154,
+          "samples_ns": [
+            279685641516,
+            281536940176,
+            281642119804
+          ],
+          "samples_ts": [
+            1.83063,
+            1.81859,
+            1.81791
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1429
+    },
+    {
+      "timestamp_utc": "2025-12-14T18:59:35.431539+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:44:36Z\",\n    \"avg_ns\": 172586556361,\n    \"stddev_ns\": 8078185,\n    \"avg_ts\": 2.966627,\n    \"stddev_ts\": 0.000139,\n    \"samples_ns\": [ 172577298158, 172590291733, 172592079193 ],\n    \"samples_ts\": [ 2.96679, 2.96656, 2.96653 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:56:06Z\",\n    \"avg_ns\": 69440651994,\n    \"stddev_ns\": 14006777,\n    \"avg_ts\": 1.843301,\n    \"stddev_ts\": 0.000372,\n    \"samples_ns\": [ 69426611101, 69454614409, 69440730474 ],\n    \"samples_ts\": [ 1.84367, 1.84293, 1.8433 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:44:36Z",
+          "avg_ns": 172586556361,
+          "stddev_ns": 8078185,
+          "avg_ts": 2.966627,
+          "stddev_ts": 0.000139,
+          "samples_ns": [
+            172577298158,
+            172590291733,
+            172592079193
+          ],
+          "samples_ts": [
+            2.96679,
+            2.96656,
+            2.96653
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:56:06Z",
+          "avg_ns": 69440651994,
+          "stddev_ns": 14006777,
+          "avg_ts": 1.843301,
+          "stddev_ts": 0.000372,
+          "samples_ns": [
+            69426611101,
+            69454614409,
+            69440730474
+          ],
+          "samples_ts": [
+            1.84367,
+            1.84293,
+            1.8433
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1430
+    },
+    {
+      "timestamp_utc": "2025-12-14T19:25:13.056788+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "128",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T18:59:36Z\",\n    \"avg_ns\": 172650758755,\n    \"stddev_ns\": 9942946,\n    \"avg_ts\": 2.965524,\n    \"stddev_ts\": 0.000170,\n    \"samples_ns\": [ 172656813127, 172656159222, 172639303918 ],\n    \"samples_ts\": [ 2.96542, 2.96543, 2.96572 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 128,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:11:07Z\",\n    \"avg_ns\": 281670650921,\n    \"stddev_ns\": 72882510,\n    \"avg_ts\": 1.817726,\n    \"stddev_ts\": 0.000470,\n    \"samples_ns\": [ 281734945338, 281591479787, 281685527639 ],\n    \"samples_ts\": [ 1.81731, 1.81824, 1.81763 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T18:59:36Z",
+          "avg_ns": 172650758755,
+          "stddev_ns": 9942946,
+          "avg_ts": 2.965524,
+          "stddev_ts": 0.00017,
+          "samples_ns": [
+            172656813127,
+            172656159222,
+            172639303918
+          ],
+          "samples_ts": [
+            2.96542,
+            2.96543,
+            2.96572
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 128,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:11:07Z",
+          "avg_ns": 281670650921,
+          "stddev_ns": 72882510,
+          "avg_ts": 1.817726,
+          "stddev_ts": 0.00047,
+          "samples_ns": [
+            281734945338,
+            281591479787,
+            281685527639
+          ],
+          "samples_ts": [
+            1.81731,
+            1.81824,
+            1.81763
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 128,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1431
+    },
+    {
+      "timestamp_utc": "2025-12-14T19:31:35.090569+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:25:14Z\",\n    \"avg_ns\": 43028685834,\n    \"stddev_ns\": 8954451,\n    \"avg_ts\": 2.974760,\n    \"stddev_ts\": 0.000619,\n    \"samples_ns\": [ 43020514323, 43027292910, 43038250271 ],\n    \"samples_ts\": [ 2.97532, 2.97486, 2.9741 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:28:06Z\",\n    \"avg_ns\": 69311099973,\n    \"stddev_ns\": 250003497,\n    \"avg_ts\": 1.846762,\n    \"stddev_ts\": 0.006675,\n    \"samples_ns\": [ 69022610471, 69464398203, 69446291247 ],\n    \"samples_ts\": [ 1.85446, 1.84267, 1.84315 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:25:14Z",
+          "avg_ns": 43028685834,
+          "stddev_ns": 8954451,
+          "avg_ts": 2.97476,
+          "stddev_ts": 0.000619,
+          "samples_ns": [
+            43020514323,
+            43027292910,
+            43038250271
+          ],
+          "samples_ts": [
+            2.97532,
+            2.97486,
+            2.9741
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:28:06Z",
+          "avg_ns": 69311099973,
+          "stddev_ns": 250003497,
+          "avg_ts": 1.846762,
+          "stddev_ts": 0.006675,
+          "samples_ns": [
+            69022610471,
+            69464398203,
+            69446291247
+          ],
+          "samples_ts": [
+            1.85446,
+            1.84267,
+            1.84315
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1432
+    },
+    {
+      "timestamp_utc": "2025-12-14T19:48:33.338531+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:31:36Z\",\n    \"avg_ns\": 43025541099,\n    \"stddev_ns\": 6347589,\n    \"avg_ts\": 2.974977,\n    \"stddev_ts\": 0.000438,\n    \"samples_ns\": [ 43027575467, 43030615045, 43018432787 ],\n    \"samples_ts\": [ 2.97484, 2.97463, 2.97547 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:34:28Z\",\n    \"avg_ns\": 281374070034,\n    \"stddev_ns\": 417207759,\n    \"avg_ts\": 1.819644,\n    \"stddev_ts\": 0.002700,\n    \"samples_ns\": [ 280892864249, 281594854319, 281634491535 ],\n    \"samples_ts\": [ 1.82276, 1.81822, 1.81796 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:31:36Z",
+          "avg_ns": 43025541099,
+          "stddev_ns": 6347589,
+          "avg_ts": 2.974977,
+          "stddev_ts": 0.000438,
+          "samples_ns": [
+            43027575467,
+            43030615045,
+            43018432787
+          ],
+          "samples_ts": [
+            2.97484,
+            2.97463,
+            2.97547
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:34:28Z",
+          "avg_ns": 281374070034,
+          "stddev_ns": 417207759,
+          "avg_ts": 1.819644,
+          "stddev_ts": 0.0027,
+          "samples_ns": [
+            280892864249,
+            281594854319,
+            281634491535
+          ],
+          "samples_ts": [
+            1.82276,
+            1.81822,
+            1.81796
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1433
+    },
+    {
+      "timestamp_utc": "2025-12-14T20:03:35.750691+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T19:48:34Z\",\n    \"avg_ns\": 172998923224,\n    \"stddev_ns\": 6203552,\n    \"avg_ts\": 2.959556,\n    \"stddev_ts\": 0.000106,\n    \"samples_ns\": [ 172998870733, 172992774035, 173005124906 ],\n    \"samples_ts\": [ 2.95956, 2.95966, 2.95945 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:00:06Z\",\n    \"avg_ns\": 69471240153,\n    \"stddev_ns\": 73774985,\n    \"avg_ts\": 1.842490,\n    \"stddev_ts\": 0.001957,\n    \"samples_ns\": [ 69395586091, 69542978195, 69475156175 ],\n    \"samples_ts\": [ 1.8445, 1.84059, 1.84239 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T19:48:34Z",
+          "avg_ns": 172998923224,
+          "stddev_ns": 6203552,
+          "avg_ts": 2.959556,
+          "stddev_ts": 0.000106,
+          "samples_ns": [
+            172998870733,
+            172992774035,
+            173005124906
+          ],
+          "samples_ts": [
+            2.95956,
+            2.95966,
+            2.95945
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:00:06Z",
+          "avg_ns": 69471240153,
+          "stddev_ns": 73774985,
+          "avg_ts": 1.84249,
+          "stddev_ts": 0.001957,
+          "samples_ns": [
+            69395586091,
+            69542978195,
+            69475156175
+          ],
+          "samples_ts": [
+            1.8445,
+            1.84059,
+            1.84239
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1434
+    },
+    {
+      "timestamp_utc": "2025-12-14T20:29:14.042027+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "256",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:03:37Z\",\n    \"avg_ns\": 173016997341,\n    \"stddev_ns\": 24933493,\n    \"avg_ts\": 2.959247,\n    \"stddev_ts\": 0.000426,\n    \"samples_ns\": [ 173006088361, 172999377501, 173045526161 ],\n    \"samples_ts\": [ 2.95943, 2.95955, 2.95876 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 256,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:15:09Z\",\n    \"avg_ns\": 281409577721,\n    \"stddev_ns\": 70250480,\n    \"avg_ts\": 1.819412,\n    \"stddev_ts\": 0.000454,\n    \"samples_ns\": [ 281349563812, 281392326713, 281486842640 ],\n    \"samples_ts\": [ 1.8198, 1.81952, 1.81891 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:03:37Z",
+          "avg_ns": 173016997341,
+          "stddev_ns": 24933493,
+          "avg_ts": 2.959247,
+          "stddev_ts": 0.000426,
+          "samples_ns": [
+            173006088361,
+            172999377501,
+            173045526161
+          ],
+          "samples_ts": [
+            2.95943,
+            2.95955,
+            2.95876
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 256,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:15:09Z",
+          "avg_ns": 281409577721,
+          "stddev_ns": 70250480,
+          "avg_ts": 1.819412,
+          "stddev_ts": 0.000454,
+          "samples_ns": [
+            281349563812,
+            281392326713,
+            281486842640
+          ],
+          "samples_ts": [
+            1.8198,
+            1.81952,
+            1.81891
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 256,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1435
+    },
+    {
+      "timestamp_utc": "2025-12-14T20:35:35.030293+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:29:15Z\",\n    \"avg_ns\": 43055073094,\n    \"stddev_ns\": 60515675,\n    \"avg_ts\": 2.972940,\n    \"stddev_ts\": 0.004175,\n    \"samples_ns\": [ 43124790530, 43024307823, 43016120929 ],\n    \"samples_ts\": [ 2.96813, 2.97506, 2.97563 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:32:07Z\",\n    \"avg_ns\": 68942703151,\n    \"stddev_ns\": 3093264821,\n    \"avg_ts\": 1.856704,\n    \"stddev_ts\": 0.015868,\n    \"samples_ns\": [ 69101490525, 69434277948, 68292340982 ],\n    \"samples_ts\": [ 1.85235, 1.84347, 1.8743 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:29:15Z",
+          "avg_ns": 43055073094,
+          "stddev_ns": 60515675,
+          "avg_ts": 2.97294,
+          "stddev_ts": 0.004175,
+          "samples_ns": [
+            43124790530,
+            43024307823,
+            43016120929
+          ],
+          "samples_ts": [
+            2.96813,
+            2.97506,
+            2.97563
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:32:07Z",
+          "avg_ns": 68942703151,
+          "stddev_ns": 3093264821,
+          "avg_ts": 1.856704,
+          "stddev_ts": 0.015868,
+          "samples_ns": [
+            69101490525,
+            69434277948,
+            68292340982
+          ],
+          "samples_ts": [
+            1.85235,
+            1.84347,
+            1.8743
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1436
+    },
+    {
+      "timestamp_utc": "2025-12-14T20:52:31.194438+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "128",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 128,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:35:36Z\",\n    \"avg_ns\": 43018131314,\n    \"stddev_ns\": 10331474,\n    \"avg_ts\": 2.975490,\n    \"stddev_ts\": 0.000715,\n    \"samples_ns\": [ 43008310210, 43028906916, 43017176816 ],\n    \"samples_ts\": [ 2.97617, 2.97474, 2.97556 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:38:28Z\",\n    \"avg_ns\": 280700059122,\n    \"stddev_ns\": 1341354458,\n    \"avg_ts\": 1.824039,\n    \"stddev_ts\": 0.008740,\n    \"samples_ns\": [ 279151207000, 281479389224, 281469581144 ],\n    \"samples_ts\": [ 1.83413, 1.81896, 1.81902 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 128,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:35:36Z",
+          "avg_ns": 43018131314,
+          "stddev_ns": 10331474,
+          "avg_ts": 2.97549,
+          "stddev_ts": 0.000715,
+          "samples_ns": [
+            43008310210,
+            43028906916,
+            43017176816
+          ],
+          "samples_ts": [
+            2.97617,
+            2.97474,
+            2.97556
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:38:28Z",
+          "avg_ns": 280700059122,
+          "stddev_ns": 1341354458,
+          "avg_ts": 1.824039,
+          "stddev_ts": 0.00874,
+          "samples_ns": [
+            279151207000,
+            281479389224,
+            281469581144
+          ],
+          "samples_ts": [
+            1.83413,
+            1.81896,
+            1.81902
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 128,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1437
+    },
+    {
+      "timestamp_utc": "2025-12-14T21:07:36.409571+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "128",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T20:52:32Z\",\n    \"avg_ns\": 173895856954,\n    \"stddev_ns\": 16354538,\n    \"avg_ts\": 2.944291,\n    \"stddev_ts\": 0.000277,\n    \"samples_ns\": [ 173887247163, 173885617891, 173914705810 ],\n    \"samples_ts\": [ 2.94444, 2.94446, 2.94397 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 128,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T21:04:08Z\",\n    \"avg_ns\": 69210838663,\n    \"stddev_ns\": 489761923,\n    \"avg_ts\": 1.849483,\n    \"stddev_ts\": 0.013141,\n    \"samples_ns\": [ 68645382905, 69485723608, 69501409477 ],\n    \"samples_ts\": [ 1.86466, 1.84211, 1.84169 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T20:52:32Z",
+          "avg_ns": 173895856954,
+          "stddev_ns": 16354538,
+          "avg_ts": 2.944291,
+          "stddev_ts": 0.000277,
+          "samples_ns": [
+            173887247163,
+            173885617891,
+            173914705810
+          ],
+          "samples_ts": [
+            2.94444,
+            2.94446,
+            2.94397
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 128,
+          "n_depth": 0,
+          "test_time": "2025-12-14T21:04:08Z",
+          "avg_ns": 69210838663,
+          "stddev_ns": 489761923,
+          "avg_ts": 1.849483,
+          "stddev_ts": 0.013141,
+          "samples_ns": [
+            68645382905,
+            69485723608,
+            69501409477
+          ],
+          "samples_ts": [
+            1.86466,
+            1.84211,
+            1.84169
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 128,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1438
+    },
+    {
+      "timestamp_utc": "2025-12-14T21:33:16.667917+00:00",
+      "command": [
+        "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench",
+        "--model",
+        "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "--threads",
+        "4",
+        "--batch-size",
+        "2048",
+        "--ubatch-size",
+        "512",
+        "--n-prompt",
+        "512",
+        "--n-gen",
+        "512",
+        "--repetitions",
+        "3",
+        "--output",
+        "json"
+      ],
+      "returncode": 0,
+      "stdout": "[\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 512,\n    \"n_gen\": 0,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T21:07:37Z\",\n    \"avg_ns\": 173913718845,\n    \"stddev_ns\": 13902901,\n    \"avg_ts\": 2.943989,\n    \"stddev_ts\": 0.000235,\n    \"samples_ns\": [ 173897680271, 173921611850, 173921864416 ],\n    \"samples_ts\": [ 2.94426, 2.94385, 2.94385 ]\n  },\n  {\n    \"build_commit\": \"2fa51c19b\",\n    \"build_number\": 7326,\n    \"cpu_info\": \"CPU\",\n    \"gpu_info\": \"\",\n    \"backends\": \"CPU\",\n    \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n    \"model_type\": \"gemma3 12B Q2_K - Medium\",\n    \"model_size\": 4761669504,\n    \"model_n_params\": 11766034176,\n    \"n_batch\": 2048,\n    \"n_ubatch\": 512,\n    \"n_threads\": 4,\n    \"cpu_mask\": \"0x0\",\n    \"cpu_strict\": false,\n    \"poll\": 50,\n    \"type_k\": \"f16\",\n    \"type_v\": \"f16\",\n    \"n_gpu_layers\": 99,\n    \"n_cpu_moe\": 0,\n    \"split_mode\": \"layer\",\n    \"main_gpu\": 0,\n    \"no_kv_offload\": false,\n    \"flash_attn\": false,\n    \"devices\": \"auto\",\n    \"tensor_split\": \"0.00\",\n    \"tensor_buft_overrides\": \"none\",\n    \"use_mmap\": true,\n    \"embeddings\": false,\n    \"no_op_offload\": 0,\n    \"no_host\": false,\n    \"n_prompt\": 0,\n    \"n_gen\": 512,\n    \"n_depth\": 0,\n    \"test_time\": \"2025-12-14T21:19:13Z\",\n    \"avg_ns\": 280856241486,\n    \"stddev_ns\": 646392997,\n    \"avg_ts\": 1.823003,\n    \"stddev_ts\": 0.004198,\n    \"samples_ns\": [ 280168316017, 280949419813, 281450988628 ],\n    \"samples_ts\": [ 1.82747, 1.82239, 1.81914 ]\n  }\n]\n",
+      "stderr": "",
+      "parsed": [
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 512,
+          "n_gen": 0,
+          "n_depth": 0,
+          "test_time": "2025-12-14T21:07:37Z",
+          "avg_ns": 173913718845,
+          "stddev_ns": 13902901,
+          "avg_ts": 2.943989,
+          "stddev_ts": 0.000235,
+          "samples_ns": [
+            173897680271,
+            173921611850,
+            173921864416
+          ],
+          "samples_ts": [
+            2.94426,
+            2.94385,
+            2.94385
+          ]
+        },
+        {
+          "build_commit": "2fa51c19b",
+          "build_number": 7326,
+          "cpu_info": "CPU",
+          "gpu_info": "",
+          "backends": "CPU",
+          "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+          "model_type": "gemma3 12B Q2_K - Medium",
+          "model_size": 4761669504,
+          "model_n_params": 11766034176,
+          "n_batch": 2048,
+          "n_ubatch": 512,
+          "n_threads": 4,
+          "cpu_mask": "0x0",
+          "cpu_strict": false,
+          "poll": 50,
+          "type_k": "f16",
+          "type_v": "f16",
+          "n_gpu_layers": 99,
+          "n_cpu_moe": 0,
+          "split_mode": "layer",
+          "main_gpu": 0,
+          "no_kv_offload": false,
+          "flash_attn": false,
+          "devices": "auto",
+          "tensor_split": "0.00",
+          "tensor_buft_overrides": "none",
+          "use_mmap": true,
+          "embeddings": false,
+          "no_op_offload": 0,
+          "no_host": false,
+          "n_prompt": 0,
+          "n_gen": 512,
+          "n_depth": 0,
+          "test_time": "2025-12-14T21:19:13Z",
+          "avg_ns": 280856241486,
+          "stddev_ns": 646392997,
+          "avg_ts": 1.823003,
+          "stddev_ts": 0.004198,
+          "samples_ns": [
+            280168316017,
+            280949419813,
+            281450988628
+          ],
+          "samples_ts": [
+            1.82747,
+            1.82239,
+            1.81914
+          ]
+        }
+      ],
+      "params": {
+        "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf",
+        "model_repo": "unsloth/gemma-3-12B-it-GGUF",
+        "quantization": "Q2_K_L",
+        "threads": 4,
+        "batch_size": 2048,
+        "ubatch_size": 512,
+        "n_prompt": 512,
+        "n_gen": 512,
+        "repetitions": 3,
+        "numa": null,
+        "priority": 0,
+        "progress": false
+      },
+      "run_index": 1439
+    }
+  ],
+  "plots": [
+    "/home/thomas/sunkiss/inference/result/throughput_vs_threads.png",
+    "/home/thomas/sunkiss/inference/result/throughput_vs_batch.png",
+    "/home/thomas/sunkiss/inference/result/latency_vs_threads.png"
+  ]
+}